Post plotting refactor
This commit is contained in:
@@ -159,61 +159,40 @@ update_gene_names <- function(df, sgd_gene_list) {
|
|||||||
|
|
||||||
# Process either deletion and or reference strain(s)
|
# Process either deletion and or reference strain(s)
|
||||||
process_strains <- function(df) {
|
process_strains <- function(df) {
|
||||||
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
message("Processing strains")
|
||||||
|
|
||||||
for (concentration in unique(df$conc_num)) {
|
df %>%
|
||||||
message("Processing concentration: ", concentration)
|
group_by(conc_num) %>%
|
||||||
df_temp <- df %>% filter(conc_num == concentration)
|
mutate(
|
||||||
|
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||||
if (concentration > 0) {
|
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||||
max_l_theoretical <- df_temp %>% pull(max_L)
|
SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
|
||||||
|
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||||
df_temp <- df_temp %>%
|
ungroup()
|
||||||
mutate(
|
|
||||||
L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L), # Replace zero values with max theoretical
|
|
||||||
SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM), # Set SM flag
|
|
||||||
L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) # Cap L values
|
|
||||||
)
|
|
||||||
}
|
|
||||||
# Append the results of this concentration to df_strains
|
|
||||||
df_strains <- bind_rows(df_strains, df_temp)
|
|
||||||
}
|
|
||||||
|
|
||||||
return(df_strains)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Calculate summary statistics for all variables
|
# Calculate summary statistics for all variables
|
||||||
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
||||||
# Replace 0 values with NA
|
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
mutate(across(all_of(variables), ~ifelse(. == 0, NA, .)))
|
mutate(across(all_of(variables), ~ ifelse(. == 0, NA, .)))
|
||||||
|
|
||||||
print("Head of df for summary stats calculations:")
|
|
||||||
print(head(df))
|
|
||||||
|
|
||||||
# Calculate summary statistics, including a single N based on L
|
|
||||||
summary_stats <- df %>%
|
summary_stats <- df %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
reframe(
|
summarise(
|
||||||
N = sum(!is.na(L)), # Single N based on L
|
N = sum(!is.na(L)),
|
||||||
across(all_of(variables), list(
|
across(all_of(variables), list(
|
||||||
mean = ~mean(., na.rm = TRUE),
|
mean = ~mean(., na.rm = TRUE),
|
||||||
median = ~median(., na.rm = TRUE),
|
median = ~median(., na.rm = TRUE),
|
||||||
max = ~max(., na.rm = TRUE),
|
max = ~max(., na.rm = TRUE),
|
||||||
min = ~min(., na.rm = TRUE),
|
min = ~min(., na.rm = TRUE),
|
||||||
sd = ~sd(., na.rm = TRUE),
|
sd = ~sd(., na.rm = TRUE),
|
||||||
se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1) # TODO unsure why this is - 1
|
se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1)
|
||||||
), .names = "{.fn}_{.col}")
|
), .names = "{.fn}_{.col}")
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Summary stats:")
|
df_cleaned <- df %>%
|
||||||
print(head(summary_stats))
|
select(-any_of(names(summary_stats)))
|
||||||
|
|
||||||
# Remove existing stats columns from df if they already exist
|
|
||||||
stat_columns <- setdiff(names(summary_stats), group_vars)
|
|
||||||
df_cleaned <- df %>% select(-any_of(stat_columns))
|
|
||||||
|
|
||||||
# Join the summary stats back to the cleaned original dataframe
|
|
||||||
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
||||||
|
|
||||||
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
||||||
@@ -222,11 +201,6 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
# Calculate interaction scores
|
# Calculate interaction scores
|
||||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||||
|
|
||||||
# if (nrow(df) == 0) {
|
|
||||||
# message("Dataframe is empty after filtering")
|
|
||||||
# return(NULL) # Or handle the empty dataframe case as needed
|
|
||||||
# }
|
|
||||||
|
|
||||||
# Calculate total concentration variables
|
# Calculate total concentration variables
|
||||||
total_conc_num <- length(unique(df$conc_num))
|
total_conc_num <- length(unique(df$conc_num))
|
||||||
num_non_removed_concs <- total_conc_num - sum(df$DB, na.rm = TRUE) - 1
|
num_non_removed_concs <- total_conc_num - sum(df$DB, na.rm = TRUE) - 1
|
||||||
@@ -318,10 +292,11 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
print("Calculating interaction scores part 2")
|
print("Calculating interaction scores part 2")
|
||||||
interaction_scores_all <- interaction_scores %>%
|
interaction_scores_all <- interaction_scores %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
mutate(lm_score_L = max_conc * coef(lm(Delta_L ~ conc_num_factor))[2] + coef(lm(Delta_L ~ conc_num_factor))[1],
|
mutate(
|
||||||
lm_score_K = max_conc * coef(lm(Delta_K ~ conc_num_factor))[2] + coef(lm(Delta_K ~ conc_num_factor))[1],
|
lm_score_L = max_conc * coef(lm(Delta_L ~ conc_num_factor))[2] + coef(lm(Delta_L ~ conc_num_factor))[1],
|
||||||
lm_score_r = max_conc * coef(lm(Delta_r ~ conc_num_factor))[2] + coef(lm(Delta_r ~ conc_num_factor))[1],
|
lm_score_K = max_conc * coef(lm(Delta_K ~ conc_num_factor))[2] + coef(lm(Delta_K ~ conc_num_factor))[1],
|
||||||
lm_score_AUC = max_conc * coef(lm(Delta_AUC ~ conc_num_factor))[2] + coef(lm(Delta_AUC ~ conc_num_factor))[1]) %>%
|
lm_score_r = max_conc * coef(lm(Delta_r ~ conc_num_factor))[2] + coef(lm(Delta_r ~ conc_num_factor))[1],
|
||||||
|
lm_score_AUC = max_conc * coef(lm(Delta_AUC ~ conc_num_factor))[2] + coef(lm(Delta_AUC ~ conc_num_factor))[1]) %>%
|
||||||
mutate(
|
mutate(
|
||||||
Avg_Zscore_L = sum(Z_Shift_L, na.rm = TRUE) / num_non_removed_concs,
|
Avg_Zscore_L = sum(Z_Shift_L, na.rm = TRUE) / num_non_removed_concs,
|
||||||
Avg_Zscore_K = sum(Z_Shift_K, na.rm = TRUE) / num_non_removed_concs,
|
Avg_Zscore_K = sum(Z_Shift_K, na.rm = TRUE) / num_non_removed_concs,
|
||||||
@@ -338,11 +313,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
|
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_var = "conc_num",
|
generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_var = "conc_num",
|
||||||
title = "", x_label = NULL, y_label = NULL, ylim_vals = NULL) {
|
title = "", x_label = NULL, y_label = NULL, ylim_vals = NULL, annotations = NULL) {
|
||||||
|
|
||||||
# Initialize plot with dynamic x and color variables
|
|
||||||
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
|
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
|
||||||
|
|
||||||
if (!is.null(y_var)) {
|
if (!is.null(y_var)) {
|
||||||
@@ -353,12 +326,14 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_
|
|||||||
y_sd_col <- paste0("sd_", y_var)
|
y_sd_col <- paste0("sd_", y_var)
|
||||||
|
|
||||||
plot <- plot + geom_point(shape = 3, size = 0.2) +
|
plot <- plot + geom_point(shape = 3, size = 0.2) +
|
||||||
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
|
geom_errorbar(aes(
|
||||||
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
|
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
|
||||||
|
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)),
|
||||||
|
width = 0.1) +
|
||||||
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
|
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add the required plot type (scatter, box, density, bar)
|
# Add the specified plot type
|
||||||
plot <- switch(plot_type,
|
plot <- switch(plot_type,
|
||||||
"box" = plot + geom_boxplot(),
|
"box" = plot + geom_boxplot(),
|
||||||
"density" = plot + geom_density(),
|
"density" = plot + geom_density(),
|
||||||
@@ -366,20 +341,27 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_
|
|||||||
plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter
|
plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply y-axis limits if specified
|
|
||||||
if (!is.null(ylim_vals)) {
|
if (!is.null(ylim_vals)) {
|
||||||
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add title and axis labels
|
|
||||||
plot <- plot + ggtitle(title) + theme_publication()
|
plot <- plot + ggtitle(title) + theme_publication()
|
||||||
|
|
||||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||||
|
|
||||||
|
if (!is.null(annotations)) {
|
||||||
|
for (annotation in annotations) {
|
||||||
|
plot <- plot + annotate("text",
|
||||||
|
x = annotation$x,
|
||||||
|
y = annotation$y,
|
||||||
|
label = annotation$label)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return(plot)
|
return(plot)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
||||||
plots <- list()
|
plots <- list()
|
||||||
|
|
||||||
@@ -391,24 +373,16 @@ generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
|||||||
message("Generating plots for dataframe")
|
message("Generating plots for dataframe")
|
||||||
|
|
||||||
for (config in plot_configs) {
|
for (config in plot_configs) {
|
||||||
# Generate the plot using the configurations
|
|
||||||
plot <- generate_plot(
|
plot <- generate_plot(
|
||||||
df = df,
|
df = df,
|
||||||
x_var = config$x_var,
|
x_var = config$x_var,
|
||||||
y_var = config$y_var,
|
y_var = config$y_var,
|
||||||
plot_type = config$plot_type,
|
plot_type = config$plot_type,
|
||||||
title = config$title,
|
title = config$title,
|
||||||
ylim_vals = config$ylim_vals
|
ylim_vals = config$ylim_vals,
|
||||||
|
annotations = config$annotations
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add custom annotations if provided
|
|
||||||
if (!is.null(config$annotations)) {
|
|
||||||
for (annotation in config$annotations) {
|
|
||||||
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Store the plot with variable name and plot type
|
|
||||||
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
|
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -416,16 +390,12 @@ generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
|||||||
save_plots(file_prefix, plots, output_dir)
|
save_plots(file_prefix, plots, output_dir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Standardized saving of plots
|
||||||
|
|
||||||
# Ensure all plots are saved and printed to PDF
|
|
||||||
save_plots <- function(file_name, plot_list, output_dir) {
|
save_plots <- function(file_name, plot_list, output_dir) {
|
||||||
# Save all plots to a single PDF
|
|
||||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||||
lapply(plot_list, print)
|
lapply(plot_list, print)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
# Save each plot as an interactive HTML file
|
|
||||||
lapply(names(plot_list), function(plot_name) {
|
lapply(names(plot_list), function(plot_name) {
|
||||||
pgg <- tryCatch({
|
pgg <- tryCatch({
|
||||||
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
||||||
@@ -436,109 +406,79 @@ save_plots <- function(file_name, plot_list, output_dir) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
if (!is.null(pgg)) {
|
if (!is.null(pgg)) {
|
||||||
saveWidget(pgg, file = file.path(output_dir, paste0(file_name, "_", plot_name, ".html")), selfcontained = TRUE)
|
saveWidget(pgg,
|
||||||
|
file = file.path(output_dir, paste0(file_name, "_", plot_name, ".html")),
|
||||||
|
selfcontained = TRUE)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interaction_plot_configs <- function(df, variables) {
|
||||||
|
plot_configs <- list()
|
||||||
|
|
||||||
interaction_plot_configs <- function(df, variable) {
|
for (variable in variables) {
|
||||||
ylim_vals <- switch(variable,
|
# Define the y-limits based on the variable being plotted
|
||||||
"L" = c(-65, 65),
|
ylim_vals <- switch(variable,
|
||||||
"K" = c(-65, 65),
|
"L" = c(-65, 65),
|
||||||
"r" = c(-0.65, 0.65),
|
"K" = c(-65, 65),
|
||||||
"AUC" = c(-6500, 6500),
|
"r" = c(-0.65, 0.65),
|
||||||
NULL
|
"AUC" = c(-6500, 6500)
|
||||||
)
|
|
||||||
|
|
||||||
wt_sd <- paste0("WT_sd_", variable)
|
|
||||||
delta_var <- paste0("Delta_", variable)
|
|
||||||
z_shift <- paste0("Z_Shift_", variable)
|
|
||||||
z_lm <- paste0("Z_lm_", variable)
|
|
||||||
|
|
||||||
list(
|
|
||||||
x_var = "conc_num_factor",
|
|
||||||
y_var = delta_var,
|
|
||||||
plot_type = "scatter",
|
|
||||||
title = paste("Scatter plot for", variable),
|
|
||||||
ylim_vals = ylim_vals,
|
|
||||||
annotations = list(
|
|
||||||
list(x = 1, y = max(ylim_vals) * 0.7, label = paste("ZShift =", round(df[[z_shift]], 2))),
|
|
||||||
list(x = 1, y = max(ylim_vals) * 0.5, label = paste("Z lm Score =", round(df[[z_lm]], 2))),
|
|
||||||
list(x = 1, y = min(ylim_vals) * 0.7, label = paste("NG =", df$NG)),
|
|
||||||
list(x = 1, y = min(ylim_vals) * 0.85, label = paste("DB =", df$DB)),
|
|
||||||
list(x = 1, y = min(ylim_vals) * 1.1, label = paste("SM =", df$SM))
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
generate_interaction_plots <- function(df, output_file) {
|
# Dynamically generate the column names for standard deviation and delta
|
||||||
message("Generating interaction plots")
|
wt_sd_col <- paste0("WT_sd_", variable)
|
||||||
|
delta_var <- paste0("Delta_", variable)
|
||||||
|
z_shift <- paste0("Z_Shift_", variable)
|
||||||
|
z_lm <- paste0("Z_lm_", variable)
|
||||||
|
|
||||||
# Variables to be plotted
|
# Set annotations for ZShift, Z lm Score, NG, DB, SM
|
||||||
variables <- c("L", "K", "r", "AUC")
|
annotations <- list(
|
||||||
ylims <- list(
|
list(x = 1, y = ifelse(variable == "L", 45, ifelse(variable == "K", 45,
|
||||||
L = c(0, 160),
|
ifelse(variable == "r", 0.45, 4500))), label = paste("ZShift =", round(df[[z_shift]], 2))),
|
||||||
K = c(-20, 160),
|
list(x = 1, y = ifelse(variable == "L", 25, ifelse(variable == "K", 25,
|
||||||
r = c(0, 1),
|
ifelse(variable == "r", 0.25, 2500))), label = paste("lm ZScore =", round(df[[z_lm]], 2))),
|
||||||
AUC = c(0, 12500)
|
list(x = 1, y = ifelse(variable == "L", -25, ifelse(variable == "K", -25,
|
||||||
)
|
ifelse(variable == "r", -0.25, -2500))), label = paste("NG =", df$NG)),
|
||||||
|
list(x = 1, y = ifelse(variable == "L", -35, ifelse(variable == "K", -35,
|
||||||
|
ifelse(variable == "r", -0.35, -3500))), label = paste("DB =", df$DB)),
|
||||||
|
list(x = 1, y = ifelse(variable == "L", -45, ifelse(variable == "K", -45,
|
||||||
|
ifelse(variable == "r", -0.45, -4500))), label = paste("SM =", df$SM))
|
||||||
|
)
|
||||||
|
|
||||||
plot_list <- list()
|
# Define the configuration for each variable (plot type, limits, labels)
|
||||||
|
plot_configs[[variable]] <- list(
|
||||||
# Generate plots for each variable using the existing plotting function
|
|
||||||
for (var in variables) {
|
|
||||||
plot <- generate_plot(
|
|
||||||
df = df,
|
|
||||||
x_var = "conc_num_factor",
|
x_var = "conc_num_factor",
|
||||||
y_var = var,
|
y_var = delta_var,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
title = paste("Scatter RF for", var, "with SD"),
|
title = paste(df$OrfRep[1], df$Gene[1], sep = " "),
|
||||||
ylim_vals = ylims[[var]]
|
ylim_vals = ylim_vals,
|
||||||
) +
|
annotations = annotations,
|
||||||
annotate("text", x = -0.25, y = ifelse(var == "L", 10, ifelse(var == "K", -5, 0.9)), label = "NG") +
|
error_bar = list(
|
||||||
annotate("text", x = -0.25, y = ifelse(var == "L", 5, ifelse(var == "K", -12.5, 0.8)), label = "DB") +
|
ymin = 0 - (2 * df[[wt_sd_col]]),
|
||||||
annotate("text", x = -0.25, y = ifelse(var == "L", 0, ifelse(var == "K", -20, 0.7)), label = "SM") +
|
ymax = 0 + (2 * df[[wt_sd_col]])
|
||||||
annotate("text", x = unique(df$conc_num_factor), y = ifelse(var == "L", 10, ifelse(var == "K", -5, 0.9)), label = df$NG) +
|
),
|
||||||
annotate("text", x = unique(df$conc_num_factor), y = ifelse(var == "L", 5, ifelse(var == "K", -12.5, 0.8)), label = df$DB) +
|
x_breaks = unique(df$conc_num_factor),
|
||||||
annotate("text", x = unique(df$conc_num_factor), y = ifelse(var == "L", 0, ifelse(var == "K", -20, 0.7)), label = df$SM)
|
x_labels = unique(as.character(df$conc_num)),
|
||||||
|
x_label = unique(df$Drug[1])
|
||||||
plot_list[[var]] <- plot
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Save plots in a PDF
|
return(plot_configs)
|
||||||
pdf(output_file, width = 16, height = 16)
|
|
||||||
grid.arrange(
|
|
||||||
plot_list$L, plot_list$K,
|
|
||||||
plot_list$r, plot_list$AUC,
|
|
||||||
ncol = 2, nrow = 2
|
|
||||||
)
|
|
||||||
dev.off()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
|
correlation_plot_configs <- function(df, lm_list, lm_summaries) {
|
||||||
lm_summaries <- lapply(lm_list, summary)
|
lapply(seq_along(lm_list), function(i) {
|
||||||
|
|
||||||
# Define plot titles and annotation based on R-squared values from the lm_list
|
|
||||||
plot_configs <- lapply(seq_along(lm_list), function(i) {
|
|
||||||
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
|
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
|
||||||
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
|
|
||||||
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
|
|
||||||
|
|
||||||
list(
|
list(
|
||||||
x_var = names(lm_list)[i][1],
|
x_var = names(lm_list)[i][1],
|
||||||
y_var = names(lm_list)[i][2],
|
y_var = names(lm_list)[i][2],
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
title = plot_titles[i],
|
title = paste("Correlation between", names(lm_list)[i][1], "and", names(lm_list)[i][2]),
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Generate and save the plots using the new system
|
|
||||||
generate_and_save_plots(df_na_rm, output_dir, "Correlation_CPPs", plot_configs)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Adjust missing values and calculate ranks
|
# Adjust missing values and calculate ranks
|
||||||
adjust_missing_and_rank <- function(df, variables) {
|
adjust_missing_and_rank <- function(df, variables) {
|
||||||
|
|
||||||
@@ -553,7 +493,6 @@ adjust_missing_and_rank <- function(df, variables) {
|
|||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", title_prefix = "",
|
generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", title_prefix = "",
|
||||||
x_label = NULL, y_label = NULL, ylim_vals = NULL, output_dir, file_prefix = "plot") {
|
x_label = NULL, y_label = NULL, ylim_vals = NULL, output_dir, file_prefix = "plot") {
|
||||||
plot_list <- list()
|
plot_list <- list()
|
||||||
@@ -590,40 +529,6 @@ generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num",
|
|||||||
save_plots(file_prefix, plot_list, output_dir)
|
save_plots(file_prefix, plot_list, output_dir)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to generate rank plots for the provided dataframe
|
|
||||||
# create_ranked_plots <- function(df, output_dir) {
|
|
||||||
|
|
||||||
# # List of variables for which we need to generate rank plots
|
|
||||||
# variables <- c("L", "K", "r", "AUC")
|
|
||||||
|
|
||||||
# # Adjust missing values and calculate ranks
|
|
||||||
# df_adjusted <- adjust_missing_and_rank(df, variables)
|
|
||||||
|
|
||||||
# # Generate rank plots
|
|
||||||
# for (var in variables) {
|
|
||||||
# plot_rank_avg <- generate_plot(
|
|
||||||
# df = df_adjusted,
|
|
||||||
# x_var = paste0(var, "_Rank"),
|
|
||||||
# y_var = paste0("Avg_Zscore_", var),
|
|
||||||
# plot_type = "scatter",
|
|
||||||
# title = paste("Average Z score vs Rank for", var)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# plot_rank_lm <- generate_plot(
|
|
||||||
# df = df_adjusted,
|
|
||||||
# x_var = paste0(var, "_Rank_lm"),
|
|
||||||
# y_var = paste0("Z_lm_", var),
|
|
||||||
# plot_type = "scatter",
|
|
||||||
# title = paste("Interaction Z score vs Rank for", var)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# save_plots(paste0("RankPlots_", var), list(
|
|
||||||
# plot_rank_avg = plot_rank_avg,
|
|
||||||
# plot_rank_lm = plot_rank_lm
|
|
||||||
# ), output_dir)
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
exp <- args$experiments[[exp_name]]
|
exp <- args$experiments[[exp_name]]
|
||||||
@@ -807,8 +712,10 @@ main <- function() {
|
|||||||
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Create interaction plots
|
# Create interaction plots
|
||||||
generate_interaction_plots(df_reference, output_file = file.path(output_dir, "RF_InteractionPlots.pdf"))
|
deletion_plot_configs <- interaction_plot_configs(df_reference, variables)
|
||||||
generate_interaction_plots(df_deletion, output_file = file.path(output_dir, "InteractionPlots.pdf"))
|
generate_and_save_plots(zscores_calculations, out_dir, "RF_InteractionPlots", deletion_plot_configs)
|
||||||
|
deletion_plot_configs <- interaction_plot_configs(df_deletion, variables)
|
||||||
|
generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs)
|
||||||
|
|
||||||
# Define conditions for enhancers and suppressors
|
# Define conditions for enhancers and suppressors
|
||||||
# TODO Add to study config file?
|
# TODO Add to study config file?
|
||||||
@@ -861,24 +768,13 @@ main <- function() {
|
|||||||
|
|
||||||
# Interaction plots for reference strain
|
# Interaction plots for reference strain
|
||||||
variables <- c("L", "K", "r", "AUC")
|
variables <- c("L", "K", "r", "AUC")
|
||||||
reference_plot_configs <- lapply(variables, function(var) {
|
reference_plot_configs <- interaction_plot_configs(df_reference, variables)
|
||||||
interaction_plot_configs(zscores_calculations_reference, var)
|
generate_and_save_plots(zscores_calculation_reference, out_dir, "RF_InteractionPlots", reference_plot_configs)
|
||||||
})
|
|
||||||
|
|
||||||
generate_and_save_plots(zscores_calculations_reference, out_dir, "RF_InteractionPlots", reference_plot_configs)
|
|
||||||
|
|
||||||
# Interaction plots for deletion strains
|
|
||||||
deletion_plot_configs <- lapply(variables, function(var) {
|
|
||||||
interaction_plot_configs(zscores_calculations, var)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
deletion_plot_configs <- interaction_plot_configs(df_deletion, variables)
|
||||||
generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs)
|
generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs)
|
||||||
|
|
||||||
# Apply filtering to remove NA values before further analysis
|
# Correlation plots
|
||||||
zscores_interactions_filtered <- zscores_interactions %>%
|
|
||||||
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
|
|
||||||
|
|
||||||
|
|
||||||
lm_list <- list(
|
lm_list <- list(
|
||||||
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||||
lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||||
@@ -888,26 +784,10 @@ main <- function() {
|
|||||||
lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
|
lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate cpp correlation plots
|
lm_summaries <- lapply(lm_list, summary)
|
||||||
correlation_plot_config <- list(
|
correlation_plot_config <- correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
|
||||||
list(x_var = "Z_lm_L", y_var = "Z_lm_K", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_K",
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
|
||||||
list(x_var = "Z_lm_L", y_var = "Z_lm_r", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_r",
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
|
||||||
list(x_var = "Z_lm_L", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_AUC",
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
|
||||||
list(x_var = "Z_lm_K", y_var = "Z_lm_r", plot_type = "scatter", title = "Correlation between Z_lm_K and Z_lm_r",
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
|
||||||
list(x_var = "Z_lm_K", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_K and Z_lm_AUC",
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
|
||||||
list(x_var = "Z_lm_r", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_r and Z_lm_AUC",
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), )
|
|
||||||
)
|
|
||||||
generate_and_save_plots(zscores_interactions_filtered, output_dir, "CorrelationPlots", correlation_plot_config)
|
generate_and_save_plots(zscores_interactions_filtered, output_dir, "CorrelationPlots", correlation_plot_config)
|
||||||
|
|
||||||
# Generate cpp correlation plots
|
|
||||||
#generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
|
|
||||||
|
|
||||||
# Generate ranked plots
|
# Generate ranked plots
|
||||||
rank_plot_config <- list(
|
rank_plot_config <- list(
|
||||||
list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"),
|
list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"),
|
||||||
@@ -917,9 +797,6 @@ main <- function() {
|
|||||||
)
|
)
|
||||||
# Generate and save rank plots using the existing plotting framework
|
# Generate and save rank plots using the existing plotting framework
|
||||||
generate_and_save_plots(zscores_interactions_filtered, output_dir, "RankPlots", rank_plot_config)
|
generate_and_save_plots(zscores_interactions_filtered, output_dir, "RankPlots", rank_plot_config)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user