From 642c74fa622a390e3e4b9e3ef7ae828761d51bec Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 6 Oct 2024 21:56:51 -0400 Subject: [PATCH] Convert tidy symbols --- .../apps/r/calculate_interaction_zscores.R | 265 +++++++++--------- 1 file changed, 136 insertions(+), 129 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index f8ff7b35..9fe74dd4 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -528,7 +528,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width # Filter NAs if (!is.null(config$filter_na) && config$filter_na) { df <- df %>% - filter(!is.na(.data[[config$y_var]])) + filter(!is.na(!!sym(config$y_var))) } # TODO for now skip all NA plots NA data @@ -539,17 +539,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width } # Create initial aes mappings for all plot types - aes_mapping <- aes(x = .data[[config$x_var]]) # required + aes_mapping <- aes(x = !!sym(config$x_var)) # required if (!is.null(config$y_var)) { - aes_mapping <- modifyList(aes_mapping, aes(y = .data[[config$y_var]])) # optional for density/bar plots + aes_mapping <- modifyList(aes_mapping, aes(y = !!sym(config$y_var))) # optional for density/bar plots } if (!is.null(config$color_var)) { - aes_mapping <- modifyList(aes_mapping, aes(color = .data[[config$color_var]])) # dynamic color_var + aes_mapping <- modifyList(aes_mapping, aes(color = !!sym(config$color_var))) # dynamic color_var } else if (!is.null(config$color)) { aes_mapping <- modifyList(aes_mapping, aes(color = config$color)) # static color } if (config$plot_type == "bar" && !is.null(config$color_var)) { - aes_mapping <- modifyList(aes_mapping, aes(fill = .data[[config$color_var]])) # only fill bar plots + aes_mapping <- modifyList(aes_mapping, aes(fill = !!sym(config$color_var))) # only fill bar plots } # Begin plot generation @@ -642,12 +642,11 @@ generate_scatter_plot <- function(plot, config) { # Add a cyan point for the reference data for correlation plots if (!is.null(config$cyan_points) && config$cyan_points) { plot <- plot + geom_point( + aes(x = !!sym(config$x_var), y = !!sym(config$y_var)), data = config$df_reference, - mapping = aes(x = .data[[config$x_var]], y = .data[[config$y_var]]), color = "cyan", shape = 3, - size = 0.5, - inherit.aes = FALSE + size = 0.5 ) } @@ -685,9 +684,9 @@ generate_scatter_plot <- function(plot, config) { # Only use color_var if it's present in the dataframe plot <- plot + geom_errorbar( aes( - ymin = .data[[y_mean_col]] - .data[[y_sd_col]], - ymax = .data[[y_mean_col]] + .data[[y_sd_col]], - color = .data[[config$color_var]] + ymin = !!sym(y_mean_col) - !!sym(y_sd_col), + ymax = !!sym(y_mean_col) + !!sym(y_sd_col), + color = !!sym(config$color_var) ), linewidth = 0.1 ) @@ -695,8 +694,8 @@ generate_scatter_plot <- function(plot, config) { # If color_var is missing, fall back to a default color or none plot <- plot + geom_errorbar( aes( - ymin = .data[[y_mean_col]] - .data[[y_sd_col]], - ymax = .data[[y_mean_col]] + .data[[y_sd_col]] + ymin = !!sym(y_mean_col) - !!sym(y_sd_col), + ymax = !!sym(y_mean_col) + !!sym(y_sd_col) ), color = config$error_bar_params$color, # use the provided color or default linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth) @@ -708,16 +707,14 @@ generate_scatter_plot <- function(plot, config) { if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) { if (!is.null(config$error_bar_params$color)) { plot <- plot + geom_point( - mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings + aes(x = !!sym(config$x_var), y = !!sym(y_mean_col)), color = config$error_bar_params$color, - shape = 16, - inherit.aes = FALSE # Prevent overriding global aesthetics + shape = 16 ) } else { plot <- plot + geom_point( - mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings - shape = 16, - inherit.aes = FALSE # Prevent overriding global aesthetics + aes(x = !!sym(config$x_var), y = !!sym(y_mean_col)), + shape = 16 ) } } @@ -726,30 +723,30 @@ generate_scatter_plot <- function(plot, config) { # Add linear regression line if specified if (!is.null(config$lm_line)) { # Extract necessary values - x_min <- config$lm_line$x_min - x_max <- config$lm_line$x_max - intercept <- config$lm_line$intercept - slope <- config$lm_line$slope + intercept <- config$lm_line$intercept # required + slope <- config$lm_line$slope # required + xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]]))) + xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]]))) color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue") linewidth <- ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1) - y_min <- intercept + slope * x_min - y_max <- intercept + slope * x_max + ymin <- intercept + slope * xmin + ymax <- intercept + slope * xmax # Ensure y-values are within y-limits (if any) if (!is.null(config$ylim_vals)) { - y_min_within_limits <- y_min >= config$ylim_vals[1] && y_min <= config$ylim_vals[2] - y_max_within_limits <- y_max >= config$ylim_vals[1] && y_max <= config$ylim_vals[2] + ymin_within_limits <- ymin >= config$ylim_vals[1] && ymin <= config$ylim_vals[2] + ymax_within_limits <- ymax >= config$ylim_vals[1] && ymax <= config$ylim_vals[2] # Adjust or skip based on whether the values fall within limits - if (y_min_within_limits && y_max_within_limits) { + if (ymin_within_limits && ymax_within_limits) { plot <- plot + annotate( "segment", - x = x_min, - xend = x_max, - y = y_min, - yend = y_max, + x = xmin, + xend = xmax, + y = ymin, + yend = ymax, color = color, - linewidth = linewidth + linewidth = linewidth, ) } else { message("Skipping linear regression line due to y-values outside of limits") @@ -758,10 +755,10 @@ generate_scatter_plot <- function(plot, config) { # If no y-limits are provided, proceed with the annotation plot <- plot + annotate( "segment", - x = x_min, - xend = x_max, - y = y_min, - yend = y_max, + x = xmin, + xend = xmax, + y = ymin, + yend = ymax, color = color, linewidth = linewidth ) @@ -837,7 +834,7 @@ generate_scatter_plot <- function(plot, config) { generate_boxplot <- function(plot, config) { # Convert x_var to a factor within aes mapping - plot <- plot + geom_boxplot(aes(x = factor(.data[[config$x_var]]))) + plot <- plot + geom_boxplot(aes(x = factor(!!sym(config$x_var)))) # Customize X-axis if specified if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { @@ -871,7 +868,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df df_plot <- if (stage == "before") df_before else df_after # Check for non-finite values in the y-variable - # df_plot_filtered <- df_plot %>% filter(is.finite(.data[[var]])) + # df_plot_filtered <- df_plot %>% filter(is.finite(!!sym(var))) # Adjust settings based on plot_type plot_config <- list( @@ -950,7 +947,7 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) ) for (x_val in unique(df_summary$conc_num_factor_factor)) { - current_df <- df_summary %>% filter(.data[[plot_config$x_var]] == x_val) + current_df <- df_summary %>% filter(!!sym(plot_config$x_var) == x_val) annotations <- append(annotations, list( list(x = x_val, y = y_limits[1] + 0.08 * y_span, label = first(current_df$NG, default = 0), size = 4), list(x = x_val, y = y_limits[1] + 0.04 * y_span, label = first(current_df$DB, default = 0), size = 4), @@ -1007,9 +1004,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) # Anti-filter to select out-of-bounds rows out_of_bounds <- group_data %>% - filter(is.na(.data[[y_var_name]]) | - .data[[y_var_name]] < y_limits[1] | - .data[[y_var_name]] > y_limits[2]) + filter(is.na(!!sym(y_var_name)) | + !!sym(y_var_name) < y_limits[1] | + !!sym(y_var_name) > y_limits[2]) if (nrow(out_of_bounds) > 0) { message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]", @@ -1019,9 +1016,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) # Do the actual filtering group_data_filtered <- group_data %>% - filter(!is.na(.data[[y_var_name]]) & - .data[[y_var_name]] >= y_limits[1] & - .data[[y_var_name]] <= y_limits[2]) + filter(!is.na(!!sym(y_var_name)) & + !!sym(y_var_name) >= y_limits[1] & + !!sym(y_var_name) <= y_limits[2]) if (nrow(group_data_filtered) == 0) { message("Insufficient data for plot: ", OrfRepTitle, " ", var) @@ -1039,8 +1036,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) lm_intercept_col <- paste0("lm_intercept_", var) lm_slope_col <- paste0("lm_slope_", var) - lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0) - lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0) + lm_intercept <- first(group_data_filtered[[lm_intercept_col]], default = 0) + lm_slope <- first(group_data_filtered[[lm_slope_col]], default = 0) plot_config <- list( df = group_data_filtered, @@ -1073,12 +1070,10 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) x_labels = as.character(unique(group_data_filtered$conc_num)), ylim_vals = y_limits, lm_line = list( - intercept = lm_intercept_value, - slope = lm_slope_value, + intercept = lm_intercept, + slope = lm_slope, color = "blue", - linewidth = 0.8, - x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)), - x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor)) + linewidth = 0.8 ) ) delta_plot_configs <- append(delta_plot_configs, list(plot_config)) @@ -1194,6 +1189,12 @@ generate_correlation_plot_configs <- function(df, df_reference) { list(x = "r", y = "AUC") ) + # Filter both dataframes for missing linear model zscores for plotting + df <- df %>% + filter(!is.na(Z_lm_L)) + df_reference <- df_reference %>% + filter(!is.na(Z_lm_L)) + plot_configs <- list() # Iterate over the option to highlight cyan points (TRUE/FALSE) @@ -1205,11 +1206,14 @@ generate_correlation_plot_configs <- function(df, df_reference) { x_var <- paste0("Z_lm_", rel$x) y_var <- paste0("Z_lm_", rel$y) - # Extract the R-squared, intercept, and slope from the df - relationship_name <- paste0(rel$x, "_vs_", rel$y) - intercept <- df[[paste0("lm_intercept_", rel$x)]] - slope <- df[[paste0("lm_slope_", rel$x)]] - r_squared <- df[[paste0("lm_R_squared_", rel$x)]] + # Find the max and min of both dataframes for printing linear regression line + xmin <- min(c(min(df[[x_var]], na.rm = TRUE), min(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE) + xmax <- max(c(max(df[[x_var]], na.rm = TRUE), max(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE) + + # Extract the R-squared, intercept, and slope from the df (first value) + intercept <- df[[paste0("lm_intercept_", rel$x)]][1] + slope <- df[[paste0("lm_slope_", rel$x)]][1] + r_squared <- df[[paste0("lm_R_squared_", rel$x)]][1] # Generate the label for the plot plot_label <- paste("Interaction", rel$x, "vs.", rel$y) @@ -1232,7 +1236,10 @@ generate_correlation_plot_configs <- function(df, df_reference) { lm_line = list( intercept = intercept, slope = slope, - color = "tomato3" + color = "tomato3", + linewidth = 0.8, + xmin = xmin, + xmax = xmax ), color = "gray70", filter_na = TRUE, @@ -1489,10 +1496,10 @@ main <- function() { ) # Parallelize background and quality control plot generation - furrr::future_map(plot_configs, function(config) { - generate_and_save_plots(config$out_dir, config$filename, config$plot_configs, - page_width = config$page_width, page_height = config$page_height) - }, .options = furrr_options(seed = TRUE)) + # furrr::future_map(plot_configs, function(config) { + # generate_and_save_plots(config$out_dir, config$filename, config$plot_configs, + # page_width = config$page_width, page_height = config$page_height) + # }, .options = furrr_options(seed = TRUE)) # Loop over background strains # TODO currently only tested against one strain, if we want to do multiple strains we'll @@ -1605,77 +1612,77 @@ main <- function() { # deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion") # generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16) - message("Writing enhancer/suppressor csv files") - interaction_threshold <- 2 # TODO add to study config? - enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold - suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold - enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold - suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold - enhancers_L <- df_interactions[enhancer_condition_L, ] - suppressors_L <- df_interactions[suppressor_condition_L, ] - enhancers_K <- df_interactions[enhancer_condition_K, ] - suppressors_K <- df_interactions[suppressor_condition_K, ] - enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ] - enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ] - write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE) - write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE) - write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE) - write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) - write.csv(enhancers_and_suppressors_L, - file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE) - write.csv(enhancers_and_suppressors_K, - file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE) + # message("Writing enhancer/suppressor csv files") + # interaction_threshold <- 2 # TODO add to study config? + # enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold + # suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold + # enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold + # suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold + # enhancers_L <- df_interactions[enhancer_condition_L, ] + # suppressors_L <- df_interactions[suppressor_condition_L, ] + # enhancers_K <- df_interactions[enhancer_condition_K, ] + # suppressors_K <- df_interactions[suppressor_condition_K, ] + # enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ] + # enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ] + # write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE) + # write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE) + # write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE) + # write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) + # write.csv(enhancers_and_suppressors_L, + # file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE) + # write.csv(enhancers_and_suppressors_K, + # file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE) - message("Writing linear model enhancer/suppressor csv files") - lm_interaction_threshold <- 2 # TODO add to study config? - enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ] - suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ] - enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ] - suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ] - write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE) - write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE) - write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE) - write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE) + # message("Writing linear model enhancer/suppressor csv files") + # lm_interaction_threshold <- 2 # TODO add to study config? + # enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ] + # suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ] + # enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ] + # suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ] + # write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE) + # write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE) + # write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE) + # write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE) - message("Generating rank plots") - rank_plot_configs <- generate_rank_plot_configs( - df_interactions, - is_lm = FALSE, - adjust = TRUE - ) - generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs, - page_width = 18, page_height = 12) + # message("Generating rank plots") + # rank_plot_configs <- generate_rank_plot_configs( + # df_interactions, + # is_lm = FALSE, + # adjust = TRUE + # ) + # generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs, + # page_width = 18, page_height = 12) - message("Generating ranked linear model plots") - rank_lm_plot_configs <- generate_rank_plot_configs( - df_interactions, - is_lm = TRUE, - adjust = TRUE - ) - generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs, - page_width = 18, page_height = 12) + # message("Generating ranked linear model plots") + # rank_lm_plot_configs <- generate_rank_plot_configs( + # df_interactions, + # is_lm = TRUE, + # adjust = TRUE + # ) + # generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs, + # page_width = 18, page_height = 12) - message("Generating overlapped ranked plots") - rank_plot_filtered_configs <- generate_rank_plot_configs( - df_interactions, - is_lm = FALSE, - adjust = FALSE, - filter_na = TRUE, - overlap_color = TRUE - ) - generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs, - page_width = 18, page_height = 12) + # message("Generating overlapped ranked plots") + # rank_plot_filtered_configs <- generate_rank_plot_configs( + # df_interactions, + # is_lm = FALSE, + # adjust = FALSE, + # filter_na = TRUE, + # overlap_color = TRUE + # ) + # generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs, + # page_width = 18, page_height = 12) - message("Generating overlapped ranked linear model plots") - rank_plot_lm_filtered_configs <- generate_rank_plot_configs( - df_interactions, - is_lm = TRUE, - adjust = FALSE, - filter_na = TRUE, - overlap_color = TRUE - ) - generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs, - page_width = 18, page_height = 12) + # message("Generating overlapped ranked linear model plots") + # rank_plot_lm_filtered_configs <- generate_rank_plot_configs( + # df_interactions, + # is_lm = TRUE, + # adjust = FALSE, + # filter_na = TRUE, + # overlap_color = TRUE + # ) + # generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs, + # page_width = 18, page_height = 12) message("Generating correlation curve parameter pair plots") correlation_plot_configs <- generate_correlation_plot_configs(