Convert tidy symbols

This commit is contained in:
2024-10-06 21:56:51 -04:00
parent 214397e6ee
commit 642c74fa62

View File

@@ -528,7 +528,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
# Filter NAs
if (!is.null(config$filter_na) && config$filter_na) {
df <- df %>%
filter(!is.na(.data[[config$y_var]]))
filter(!is.na(!!sym(config$y_var)))
}
# TODO for now skip all NA plots NA data
@@ -539,17 +539,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
}
# Create initial aes mappings for all plot types
aes_mapping <- aes(x = .data[[config$x_var]]) # required
aes_mapping <- aes(x = !!sym(config$x_var)) # required
if (!is.null(config$y_var)) {
aes_mapping <- modifyList(aes_mapping, aes(y = .data[[config$y_var]])) # optional for density/bar plots
aes_mapping <- modifyList(aes_mapping, aes(y = !!sym(config$y_var))) # optional for density/bar plots
}
if (!is.null(config$color_var)) {
aes_mapping <- modifyList(aes_mapping, aes(color = .data[[config$color_var]])) # dynamic color_var
aes_mapping <- modifyList(aes_mapping, aes(color = !!sym(config$color_var))) # dynamic color_var
} else if (!is.null(config$color)) {
aes_mapping <- modifyList(aes_mapping, aes(color = config$color)) # static color
}
if (config$plot_type == "bar" && !is.null(config$color_var)) {
aes_mapping <- modifyList(aes_mapping, aes(fill = .data[[config$color_var]])) # only fill bar plots
aes_mapping <- modifyList(aes_mapping, aes(fill = !!sym(config$color_var))) # only fill bar plots
}
# Begin plot generation
@@ -642,12 +642,11 @@ generate_scatter_plot <- function(plot, config) {
# Add a cyan point for the reference data for correlation plots
if (!is.null(config$cyan_points) && config$cyan_points) {
plot <- plot + geom_point(
aes(x = !!sym(config$x_var), y = !!sym(config$y_var)),
data = config$df_reference,
mapping = aes(x = .data[[config$x_var]], y = .data[[config$y_var]]),
color = "cyan",
shape = 3,
size = 0.5,
inherit.aes = FALSE
size = 0.5
)
}
@@ -685,9 +684,9 @@ generate_scatter_plot <- function(plot, config) {
# Only use color_var if it's present in the dataframe
plot <- plot + geom_errorbar(
aes(
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
color = .data[[config$color_var]]
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col),
color = !!sym(config$color_var)
),
linewidth = 0.1
)
@@ -695,8 +694,8 @@ generate_scatter_plot <- function(plot, config) {
# If color_var is missing, fall back to a default color or none
plot <- plot + geom_errorbar(
aes(
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)
),
color = config$error_bar_params$color, # use the provided color or default
linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
@@ -708,16 +707,14 @@ generate_scatter_plot <- function(plot, config) {
if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
if (!is.null(config$error_bar_params$color)) {
plot <- plot + geom_point(
mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings
aes(x = !!sym(config$x_var), y = !!sym(y_mean_col)),
color = config$error_bar_params$color,
shape = 16,
inherit.aes = FALSE # Prevent overriding global aesthetics
shape = 16
)
} else {
plot <- plot + geom_point(
mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings
shape = 16,
inherit.aes = FALSE # Prevent overriding global aesthetics
aes(x = !!sym(config$x_var), y = !!sym(y_mean_col)),
shape = 16
)
}
}
@@ -726,30 +723,30 @@ generate_scatter_plot <- function(plot, config) {
# Add linear regression line if specified
if (!is.null(config$lm_line)) {
# Extract necessary values
x_min <- config$lm_line$x_min
x_max <- config$lm_line$x_max
intercept <- config$lm_line$intercept
slope <- config$lm_line$slope
intercept <- config$lm_line$intercept # required
slope <- config$lm_line$slope # required
xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]])))
xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]])))
color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue")
linewidth <- ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1)
y_min <- intercept + slope * x_min
y_max <- intercept + slope * x_max
ymin <- intercept + slope * xmin
ymax <- intercept + slope * xmax
# Ensure y-values are within y-limits (if any)
if (!is.null(config$ylim_vals)) {
y_min_within_limits <- y_min >= config$ylim_vals[1] && y_min <= config$ylim_vals[2]
y_max_within_limits <- y_max >= config$ylim_vals[1] && y_max <= config$ylim_vals[2]
ymin_within_limits <- ymin >= config$ylim_vals[1] && ymin <= config$ylim_vals[2]
ymax_within_limits <- ymax >= config$ylim_vals[1] && ymax <= config$ylim_vals[2]
# Adjust or skip based on whether the values fall within limits
if (y_min_within_limits && y_max_within_limits) {
if (ymin_within_limits && ymax_within_limits) {
plot <- plot + annotate(
"segment",
x = x_min,
xend = x_max,
y = y_min,
yend = y_max,
x = xmin,
xend = xmax,
y = ymin,
yend = ymax,
color = color,
linewidth = linewidth
linewidth = linewidth,
)
} else {
message("Skipping linear regression line due to y-values outside of limits")
@@ -758,10 +755,10 @@ generate_scatter_plot <- function(plot, config) {
# If no y-limits are provided, proceed with the annotation
plot <- plot + annotate(
"segment",
x = x_min,
xend = x_max,
y = y_min,
yend = y_max,
x = xmin,
xend = xmax,
y = ymin,
yend = ymax,
color = color,
linewidth = linewidth
)
@@ -837,7 +834,7 @@ generate_scatter_plot <- function(plot, config) {
generate_boxplot <- function(plot, config) {
# Convert x_var to a factor within aes mapping
plot <- plot + geom_boxplot(aes(x = factor(.data[[config$x_var]])))
plot <- plot + geom_boxplot(aes(x = factor(!!sym(config$x_var))))
# Customize X-axis if specified
if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
@@ -871,7 +868,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
df_plot <- if (stage == "before") df_before else df_after
# Check for non-finite values in the y-variable
# df_plot_filtered <- df_plot %>% filter(is.finite(.data[[var]]))
# df_plot_filtered <- df_plot %>% filter(is.finite(!!sym(var)))
# Adjust settings based on plot_type
plot_config <- list(
@@ -950,7 +947,7 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
)
for (x_val in unique(df_summary$conc_num_factor_factor)) {
current_df <- df_summary %>% filter(.data[[plot_config$x_var]] == x_val)
current_df <- df_summary %>% filter(!!sym(plot_config$x_var) == x_val)
annotations <- append(annotations, list(
list(x = x_val, y = y_limits[1] + 0.08 * y_span, label = first(current_df$NG, default = 0), size = 4),
list(x = x_val, y = y_limits[1] + 0.04 * y_span, label = first(current_df$DB, default = 0), size = 4),
@@ -1007,9 +1004,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
# Anti-filter to select out-of-bounds rows
out_of_bounds <- group_data %>%
filter(is.na(.data[[y_var_name]]) |
.data[[y_var_name]] < y_limits[1] |
.data[[y_var_name]] > y_limits[2])
filter(is.na(!!sym(y_var_name)) |
!!sym(y_var_name) < y_limits[1] |
!!sym(y_var_name) > y_limits[2])
if (nrow(out_of_bounds) > 0) {
message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]",
@@ -1019,9 +1016,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
# Do the actual filtering
group_data_filtered <- group_data %>%
filter(!is.na(.data[[y_var_name]]) &
.data[[y_var_name]] >= y_limits[1] &
.data[[y_var_name]] <= y_limits[2])
filter(!is.na(!!sym(y_var_name)) &
!!sym(y_var_name) >= y_limits[1] &
!!sym(y_var_name) <= y_limits[2])
if (nrow(group_data_filtered) == 0) {
message("Insufficient data for plot: ", OrfRepTitle, " ", var)
@@ -1039,8 +1036,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
lm_intercept_col <- paste0("lm_intercept_", var)
lm_slope_col <- paste0("lm_slope_", var)
lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0)
lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0)
lm_intercept <- first(group_data_filtered[[lm_intercept_col]], default = 0)
lm_slope <- first(group_data_filtered[[lm_slope_col]], default = 0)
plot_config <- list(
df = group_data_filtered,
@@ -1073,12 +1070,10 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
x_labels = as.character(unique(group_data_filtered$conc_num)),
ylim_vals = y_limits,
lm_line = list(
intercept = lm_intercept_value,
slope = lm_slope_value,
intercept = lm_intercept,
slope = lm_slope,
color = "blue",
linewidth = 0.8,
x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)),
x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor))
linewidth = 0.8
)
)
delta_plot_configs <- append(delta_plot_configs, list(plot_config))
@@ -1194,6 +1189,12 @@ generate_correlation_plot_configs <- function(df, df_reference) {
list(x = "r", y = "AUC")
)
# Filter both dataframes for missing linear model zscores for plotting
df <- df %>%
filter(!is.na(Z_lm_L))
df_reference <- df_reference %>%
filter(!is.na(Z_lm_L))
plot_configs <- list()
# Iterate over the option to highlight cyan points (TRUE/FALSE)
@@ -1205,11 +1206,14 @@ generate_correlation_plot_configs <- function(df, df_reference) {
x_var <- paste0("Z_lm_", rel$x)
y_var <- paste0("Z_lm_", rel$y)
# Extract the R-squared, intercept, and slope from the df
relationship_name <- paste0(rel$x, "_vs_", rel$y)
intercept <- df[[paste0("lm_intercept_", rel$x)]]
slope <- df[[paste0("lm_slope_", rel$x)]]
r_squared <- df[[paste0("lm_R_squared_", rel$x)]]
# Find the max and min of both dataframes for printing linear regression line
xmin <- min(c(min(df[[x_var]], na.rm = TRUE), min(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
xmax <- max(c(max(df[[x_var]], na.rm = TRUE), max(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
# Extract the R-squared, intercept, and slope from the df (first value)
intercept <- df[[paste0("lm_intercept_", rel$x)]][1]
slope <- df[[paste0("lm_slope_", rel$x)]][1]
r_squared <- df[[paste0("lm_R_squared_", rel$x)]][1]
# Generate the label for the plot
plot_label <- paste("Interaction", rel$x, "vs.", rel$y)
@@ -1232,7 +1236,10 @@ generate_correlation_plot_configs <- function(df, df_reference) {
lm_line = list(
intercept = intercept,
slope = slope,
color = "tomato3"
color = "tomato3",
linewidth = 0.8,
xmin = xmin,
xmax = xmax
),
color = "gray70",
filter_na = TRUE,
@@ -1489,10 +1496,10 @@ main <- function() {
)
# Parallelize background and quality control plot generation
furrr::future_map(plot_configs, function(config) {
generate_and_save_plots(config$out_dir, config$filename, config$plot_configs,
page_width = config$page_width, page_height = config$page_height)
}, .options = furrr_options(seed = TRUE))
# furrr::future_map(plot_configs, function(config) {
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs,
# page_width = config$page_width, page_height = config$page_height)
# }, .options = furrr_options(seed = TRUE))
# Loop over background strains
# TODO currently only tested against one strain, if we want to do multiple strains we'll
@@ -1605,77 +1612,77 @@ main <- function() {
# deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
# generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
message("Writing enhancer/suppressor csv files")
interaction_threshold <- 2 # TODO add to study config?
enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
enhancers_L <- df_interactions[enhancer_condition_L, ]
suppressors_L <- df_interactions[suppressor_condition_L, ]
enhancers_K <- df_interactions[enhancer_condition_K, ]
suppressors_K <- df_interactions[suppressor_condition_K, ]
enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_L,
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_K,
file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
# message("Writing enhancer/suppressor csv files")
# interaction_threshold <- 2 # TODO add to study config?
# enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
# suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
# enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
# suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
# enhancers_L <- df_interactions[enhancer_condition_L, ]
# suppressors_L <- df_interactions[suppressor_condition_L, ]
# enhancers_K <- df_interactions[enhancer_condition_K, ]
# suppressors_K <- df_interactions[suppressor_condition_K, ]
# enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
# enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
# write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
# write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
# write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
# write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
# write.csv(enhancers_and_suppressors_L,
# file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
# write.csv(enhancers_and_suppressors_K,
# file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
message("Writing linear model enhancer/suppressor csv files")
lm_interaction_threshold <- 2 # TODO add to study config?
enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
# message("Writing linear model enhancer/suppressor csv files")
# lm_interaction_threshold <- 2 # TODO add to study config?
# enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
# suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
# enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
# suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
# write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
# write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
# write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
# write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
message("Generating rank plots")
rank_plot_configs <- generate_rank_plot_configs(
df_interactions,
is_lm = FALSE,
adjust = TRUE
)
generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
page_width = 18, page_height = 12)
# message("Generating rank plots")
# rank_plot_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = FALSE,
# adjust = TRUE
# )
# generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
# page_width = 18, page_height = 12)
message("Generating ranked linear model plots")
rank_lm_plot_configs <- generate_rank_plot_configs(
df_interactions,
is_lm = TRUE,
adjust = TRUE
)
generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
page_width = 18, page_height = 12)
# message("Generating ranked linear model plots")
# rank_lm_plot_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = TRUE,
# adjust = TRUE
# )
# generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
# page_width = 18, page_height = 12)
message("Generating overlapped ranked plots")
rank_plot_filtered_configs <- generate_rank_plot_configs(
df_interactions,
is_lm = FALSE,
adjust = FALSE,
filter_na = TRUE,
overlap_color = TRUE
)
generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs,
page_width = 18, page_height = 12)
# message("Generating overlapped ranked plots")
# rank_plot_filtered_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = FALSE,
# adjust = FALSE,
# filter_na = TRUE,
# overlap_color = TRUE
# )
# generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs,
# page_width = 18, page_height = 12)
message("Generating overlapped ranked linear model plots")
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
df_interactions,
is_lm = TRUE,
adjust = FALSE,
filter_na = TRUE,
overlap_color = TRUE
)
generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs,
page_width = 18, page_height = 12)
# message("Generating overlapped ranked linear model plots")
# rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = TRUE,
# adjust = FALSE,
# filter_na = TRUE,
# overlap_color = TRUE
# )
# generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs,
# page_width = 18, page_height = 12)
message("Generating correlation curve parameter pair plots")
correlation_plot_configs <- generate_correlation_plot_configs(