Before old rank plot removal
This commit is contained in:
@@ -9,6 +9,7 @@ suppressMessages({
|
|||||||
})
|
})
|
||||||
|
|
||||||
options(warn = 2, max.print = 1000)
|
options(warn = 2, max.print = 1000)
|
||||||
|
options(width = 10000)
|
||||||
|
|
||||||
# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes)
|
# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes)
|
||||||
soft_limit <- 30 * 1024 * 1024 * 1024
|
soft_limit <- 30 * 1024 * 1024 * 1024
|
||||||
@@ -156,7 +157,7 @@ update_gene_names <- function(df, sgd_gene_list) {
|
|||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Process strains (deletion and reference)
|
# Process either deletion and or reference strain(s)
|
||||||
process_strains <- function(df) {
|
process_strains <- function(df) {
|
||||||
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
||||||
|
|
||||||
@@ -187,6 +188,9 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
df <- df %>%
|
df <- df %>%
|
||||||
mutate(across(all_of(variables), ~ifelse(. == 0, NA, .)))
|
mutate(across(all_of(variables), ~ifelse(. == 0, NA, .)))
|
||||||
|
|
||||||
|
print("Head of df for summary stats calculations:")
|
||||||
|
print(head(df))
|
||||||
|
|
||||||
# Calculate summary statistics, including a single N based on L
|
# Calculate summary statistics, including a single N based on L
|
||||||
summary_stats <- df %>%
|
summary_stats <- df %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
@@ -195,13 +199,16 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
across(all_of(variables), list(
|
across(all_of(variables), list(
|
||||||
mean = ~mean(., na.rm = TRUE),
|
mean = ~mean(., na.rm = TRUE),
|
||||||
median = ~median(., na.rm = TRUE),
|
median = ~median(., na.rm = TRUE),
|
||||||
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
max = ~max(., na.rm = TRUE),
|
||||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
min = ~min(., na.rm = TRUE),
|
||||||
sd = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE), 0), # If N == 1, sd is set to 0
|
sd = ~sd(., na.rm = TRUE),
|
||||||
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), 0) # If N == 1, se is set to 0
|
se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1) # TODO unsure why this is - 1
|
||||||
), .names = "{.fn}_{.col}")
|
), .names = "{.fn}_{.col}")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print("Summary stats:")
|
||||||
|
print(head(summary_stats))
|
||||||
|
|
||||||
# Remove existing stats columns from df if they already exist
|
# Remove existing stats columns from df if they already exist
|
||||||
stat_columns <- setdiff(names(summary_stats), group_vars)
|
stat_columns <- setdiff(names(summary_stats), group_vars)
|
||||||
df_cleaned <- df %>% select(-any_of(stat_columns))
|
df_cleaned <- df %>% select(-any_of(stat_columns))
|
||||||
@@ -215,10 +222,10 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
# Calculate interaction scores
|
# Calculate interaction scores
|
||||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||||
|
|
||||||
if (nrow(df) == 0) {
|
# if (nrow(df) == 0) {
|
||||||
message("Dataframe is empty after filtering")
|
# message("Dataframe is empty after filtering")
|
||||||
return(NULL) # Or handle the empty dataframe case as needed
|
# return(NULL) # Or handle the empty dataframe case as needed
|
||||||
}
|
# }
|
||||||
|
|
||||||
# Calculate total concentration variables
|
# Calculate total concentration variables
|
||||||
total_conc_num <- length(unique(df$conc_num))
|
total_conc_num <- length(unique(df$conc_num))
|
||||||
@@ -304,6 +311,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
) %>%
|
) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
|
print("Interaction scores:")
|
||||||
|
print(head(interaction_scores))
|
||||||
|
|
||||||
# Calculate linear models and interaction scores per gene
|
# Calculate linear models and interaction scores per gene
|
||||||
print("Calculating interaction scores part 2")
|
print("Calculating interaction scores part 2")
|
||||||
interaction_scores_all <- interaction_scores %>%
|
interaction_scores_all <- interaction_scores %>%
|
||||||
@@ -329,6 +339,107 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_var = "conc_num",
|
||||||
|
title = "", x_label = NULL, y_label = NULL, ylim_vals = NULL) {
|
||||||
|
|
||||||
|
# Initialize plot with dynamic x and color variables
|
||||||
|
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
|
||||||
|
|
||||||
|
if (!is.null(y_var)) {
|
||||||
|
plot <- plot + aes(y = !!sym(y_var))
|
||||||
|
|
||||||
|
# Add scatter points with summary stats if `y_var` is present
|
||||||
|
y_mean_col <- paste0("mean_", y_var)
|
||||||
|
y_sd_col <- paste0("sd_", y_var)
|
||||||
|
|
||||||
|
plot <- plot + geom_point(shape = 3, size = 0.2) +
|
||||||
|
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
|
||||||
|
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
|
||||||
|
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add the required plot type (scatter, box, density, bar)
|
||||||
|
plot <- switch(plot_type,
|
||||||
|
"box" = plot + geom_boxplot(),
|
||||||
|
"density" = plot + geom_density(),
|
||||||
|
"bar" = plot + geom_bar(stat = "identity"),
|
||||||
|
plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply y-axis limits if specified
|
||||||
|
if (!is.null(ylim_vals)) {
|
||||||
|
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add title and axis labels
|
||||||
|
plot <- plot + ggtitle(title) + theme_publication()
|
||||||
|
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||||
|
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||||
|
|
||||||
|
return(plot)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
||||||
|
plots <- list()
|
||||||
|
|
||||||
|
if (nrow(df) == 0) {
|
||||||
|
message("The dataframe is empty, skipping plots")
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
|
||||||
|
message("Generating plots for dataframe")
|
||||||
|
|
||||||
|
for (config in plot_configs) {
|
||||||
|
# Generate the plot using the configurations
|
||||||
|
plot <- generate_plot(
|
||||||
|
df = df,
|
||||||
|
x_var = config$x_var,
|
||||||
|
y_var = config$y_var,
|
||||||
|
plot_type = config$plot_type,
|
||||||
|
title = config$title,
|
||||||
|
ylim_vals = config$ylim_vals
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add custom annotations if provided
|
||||||
|
if (!is.null(config$annotations)) {
|
||||||
|
for (annotation in config$annotations) {
|
||||||
|
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Store the plot with variable name and plot type
|
||||||
|
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save plots to PDF and HTML
|
||||||
|
save_plots(file_prefix, plots, output_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Ensure all plots are saved and printed to PDF
|
||||||
|
save_plots <- function(file_name, plot_list, output_dir) {
|
||||||
|
# Save all plots to a single PDF
|
||||||
|
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||||
|
lapply(plot_list, print)
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Save each plot as an interactive HTML file
|
||||||
|
lapply(names(plot_list), function(plot_name) {
|
||||||
|
pgg <- tryCatch({
|
||||||
|
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
||||||
|
layout(legend = list(orientation = "h")))
|
||||||
|
}, error = function(e) {
|
||||||
|
message("Error generating plot: ", plot_name)
|
||||||
|
return(NULL)
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!is.null(pgg)) {
|
||||||
|
saveWidget(pgg, file = file.path(output_dir, paste0(file_name, "_", plot_name, ".html")), selfcontained = TRUE)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
interaction_plot_configs <- function(df, variable) {
|
interaction_plot_configs <- function(df, variable) {
|
||||||
@@ -361,102 +472,6 @@ interaction_plot_configs <- function(df, variable) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num",
|
|
||||||
title, x_label = NULL, y_label = NULL, ylim_vals = NULL) {
|
|
||||||
|
|
||||||
# Use tidy evaluation with aes() and !!sym() for dynamic column names
|
|
||||||
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
|
|
||||||
|
|
||||||
if (!is.null(y_var)) {
|
|
||||||
plot <- plot + aes(y = !!sym(y_var))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set up the plot based on the requested plot type
|
|
||||||
plot <- switch(plot_type,
|
|
||||||
"scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE),
|
|
||||||
"box" = plot + geom_boxplot(),
|
|
||||||
"density" = plot + geom_density(),
|
|
||||||
"bar" = plot + geom_bar(),
|
|
||||||
plot # Default: return the plot as is
|
|
||||||
)
|
|
||||||
|
|
||||||
if (!is.null(ylim_vals)) {
|
|
||||||
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add titles and labels if available
|
|
||||||
plot <- plot + ggtitle(title) + theme_publication()
|
|
||||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
|
||||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
|
||||||
|
|
||||||
return(plot)
|
|
||||||
}
|
|
||||||
|
|
||||||
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
|
||||||
plots <- list()
|
|
||||||
|
|
||||||
if (nrow(df) == 0) {
|
|
||||||
message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
|
|
||||||
return()
|
|
||||||
}
|
|
||||||
|
|
||||||
message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
|
|
||||||
|
|
||||||
for (config in plot_configs) {
|
|
||||||
plot <- generate_plot(
|
|
||||||
df = df,
|
|
||||||
x_var = config$x_var,
|
|
||||||
y_var = config$y_var,
|
|
||||||
plot_type = config$plot_type,
|
|
||||||
title = config$title,
|
|
||||||
ylim_vals = config$ylim_vals
|
|
||||||
)
|
|
||||||
|
|
||||||
# If custom annotations for interaction plots are required
|
|
||||||
if (!is.null(config$annotations)) {
|
|
||||||
for (annotation in config$annotations) {
|
|
||||||
plot <- plot +
|
|
||||||
annotate(
|
|
||||||
"text", x = annotation$x, y = annotation$y, label = annotation$label
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Store the plot with the variable name and plot type
|
|
||||||
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
|
|
||||||
}
|
|
||||||
|
|
||||||
save_plots(file_prefix, plots, output_dir)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Ensure all plots are saved and printed to PDF
|
|
||||||
save_plots <- function(file_name, plot_list, output_dir) {
|
|
||||||
# Save to PDF
|
|
||||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
|
||||||
lapply(plot_list, function(plot) {
|
|
||||||
print(plot)
|
|
||||||
})
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
# Save to HTML with horizontal legend orientation
|
|
||||||
lapply(names(plot_list), function(plot_name) {
|
|
||||||
message("Generating plot: ", plot_name)
|
|
||||||
pgg <- tryCatch({
|
|
||||||
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
|
||||||
layout(legend = list(orientation = "h")))
|
|
||||||
}, error = function(e) {
|
|
||||||
message("Error in plot: ", plot_name, "\n", e)
|
|
||||||
return(NULL)
|
|
||||||
})
|
|
||||||
if (!is.null(pgg)) {
|
|
||||||
saveWidget(pgg,
|
|
||||||
file = file.path(output_dir,
|
|
||||||
paste0(file_name, "_", plot_name, ".html")),
|
|
||||||
selfcontained = TRUE)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
generate_interaction_plots <- function(df, output_file) {
|
generate_interaction_plots <- function(df, output_file) {
|
||||||
message("Generating interaction plots")
|
message("Generating interaction plots")
|
||||||
|
|
||||||
@@ -503,21 +518,27 @@ generate_interaction_plots <- function(df, output_file) {
|
|||||||
|
|
||||||
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
|
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
|
||||||
lm_summaries <- lapply(lm_list, summary)
|
lm_summaries <- lapply(lm_list, summary)
|
||||||
|
|
||||||
|
# Define plot titles and annotation based on R-squared values from the lm_list
|
||||||
|
plot_configs <- lapply(seq_along(lm_list), function(i) {
|
||||||
|
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
|
||||||
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
|
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
|
||||||
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
|
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
|
||||||
|
|
||||||
plot_list <- lapply(seq_along(lm_list), function(i) {
|
list(
|
||||||
ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) +
|
x_var = names(lm_list)[i][1],
|
||||||
geom_point(shape = 3, color = "gray70") +
|
y_var = names(lm_list)[i][2],
|
||||||
geom_smooth(method = "lm", color = "tomato3") +
|
plot_type = "scatter",
|
||||||
ggtitle(plot_titles[i]) +
|
title = plot_titles[i],
|
||||||
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[i]]$r.squared, 3))) +
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
|
||||||
theme_Publication_legend_right()
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
save_plots("Correlation_CPPs", plot_list, output_dir)
|
# Generate and save the plots using the new system
|
||||||
|
generate_and_save_plots(df_na_rm, output_dir, "Correlation_CPPs", plot_configs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Adjust missing values and calculate ranks
|
# Adjust missing values and calculate ranks
|
||||||
adjust_missing_and_rank <- function(df, variables) {
|
adjust_missing_and_rank <- function(df, variables) {
|
||||||
|
|
||||||
@@ -570,39 +591,38 @@ generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num",
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Function to generate rank plots for the provided dataframe
|
# Function to generate rank plots for the provided dataframe
|
||||||
create_ranked_plots <- function(df, output_dir) {
|
# create_ranked_plots <- function(df, output_dir) {
|
||||||
|
|
||||||
# List of variables for which we need to generate rank plots
|
# # List of variables for which we need to generate rank plots
|
||||||
variables <- c("L", "K", "r", "AUC")
|
# variables <- c("L", "K", "r", "AUC")
|
||||||
|
|
||||||
# Adjust missing values and calculate ranks
|
# # Adjust missing values and calculate ranks
|
||||||
df_adjusted <- adjust_missing_and_rank(df, variables)
|
# df_adjusted <- adjust_missing_and_rank(df, variables)
|
||||||
|
|
||||||
# Generate rank plots for Avg_Zscore and Z_lm
|
# # Generate rank plots
|
||||||
for (var in variables) {
|
# for (var in variables) {
|
||||||
plot_rank_avg <- generate_plot(
|
# plot_rank_avg <- generate_plot(
|
||||||
df = df_adjusted,
|
# df = df_adjusted,
|
||||||
x_var = paste0(var, "_Rank"),
|
# x_var = paste0(var, "_Rank"),
|
||||||
y_var = paste0("Avg_Zscore_", var),
|
# y_var = paste0("Avg_Zscore_", var),
|
||||||
plot_type = "scatter",
|
# plot_type = "scatter",
|
||||||
title = paste("Average Z score vs Rank for", var)
|
# title = paste("Average Z score vs Rank for", var)
|
||||||
)
|
# )
|
||||||
|
|
||||||
plot_rank_lm <- generate_plot(
|
# plot_rank_lm <- generate_plot(
|
||||||
df = df_adjusted,
|
# df = df_adjusted,
|
||||||
x_var = paste0(var, "_Rank_lm"),
|
# x_var = paste0(var, "_Rank_lm"),
|
||||||
y_var = paste0("Z_lm_", var),
|
# y_var = paste0("Z_lm_", var),
|
||||||
plot_type = "scatter",
|
# plot_type = "scatter",
|
||||||
title = paste("Interaction Z score vs Rank for", var)
|
# title = paste("Interaction Z score vs Rank for", var)
|
||||||
)
|
# )
|
||||||
|
|
||||||
# Save the plots for Avg_Zscore and Z_lm
|
# save_plots(paste0("RankPlots_", var), list(
|
||||||
save_plots(paste0("RankPlots_", var), list(
|
# plot_rank_avg = plot_rank_avg,
|
||||||
plot_rank_avg = plot_rank_avg,
|
# plot_rank_lm = plot_rank_lm
|
||||||
plot_rank_lm = plot_rank_lm
|
# ), output_dir)
|
||||||
), output_dir)
|
# }
|
||||||
}
|
# }
|
||||||
}
|
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
@@ -622,8 +642,6 @@ main <- function() {
|
|||||||
|
|
||||||
# QC steps and filtering
|
# QC steps and filtering
|
||||||
df_above_tolerance <- df %>% filter(DB == 1)
|
df_above_tolerance <- df %>% filter(DB == 1)
|
||||||
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
|
|
||||||
df_no_zeros <- df_na %>% filter(L > 0)
|
|
||||||
|
|
||||||
# Calculate the half-medians for `L` and `K` for rows above tolerance
|
# Calculate the half-medians for `L` and `K` for rows above tolerance
|
||||||
L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
||||||
@@ -632,7 +650,24 @@ main <- function() {
|
|||||||
# Get the number of rows that are above tolerance
|
# Get the number of rows that are above tolerance
|
||||||
rows_to_remove <- nrow(df_above_tolerance)
|
rows_to_remove <- nrow(df_above_tolerance)
|
||||||
|
|
||||||
# Additional filtering for non-finite values in df_na
|
# Set L, r, K, and AUC to NA for rows that are above tolerance
|
||||||
|
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
|
||||||
|
|
||||||
|
# Calculate summary statistics for all strains, including both background and the deletions
|
||||||
|
message("Calculating summary statistics for all strains")
|
||||||
|
variables <- c("L", "K", "r", "AUC")
|
||||||
|
ss <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||||
|
summary_stats <- ss$summary_stats
|
||||||
|
df_na_stats <- ss$df_with_stats
|
||||||
|
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
print("Summary stats:")
|
||||||
|
print(head(summary_stats), width = 200)
|
||||||
|
|
||||||
|
# Remove rows with 0 values in L
|
||||||
|
df_no_zeros <- df_na %>% filter(L > 0)
|
||||||
|
|
||||||
|
# Additional filtering for non-finite values
|
||||||
df_na_filtered <- df_na %>%
|
df_na_filtered <- df_na %>%
|
||||||
filter(if_any(c(L), ~ !is.finite(.))) %>%
|
filter(if_any(c(L), ~ !is.finite(.))) %>%
|
||||||
{
|
{
|
||||||
@@ -647,7 +682,7 @@ main <- function() {
|
|||||||
qc_plot_configs <- list(
|
qc_plot_configs <- list(
|
||||||
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", ylim_vals = NULL),
|
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", ylim_vals = NULL),
|
||||||
list(x_var = "delta_bg", y_var = NULL, plot_type = "density", title = "Density plot for Delta Background", ylim_vals = NULL),
|
list(x_var = "delta_bg", y_var = NULL, plot_type = "density", title = "Density plot for Delta Background", ylim_vals = NULL),
|
||||||
list(x_var = "delta_bg", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL)
|
list(x_var = "as.factor(delta_bg)", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL)
|
||||||
)
|
)
|
||||||
|
|
||||||
above_tolerance_plot_configs <- list(
|
above_tolerance_plot_configs <- list(
|
||||||
@@ -659,7 +694,16 @@ main <- function() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
no_zeros_plot_configs <- list(
|
no_zeros_plot_configs <- list(
|
||||||
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Non-Zero L Values", ylim_vals = NULL)
|
list(x_var = "scan", y_var = "L", plot_type = "scatter", title = "L vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "scan", y_var = "K", plot_type = "scatter", title = "K vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "scan", y_var = "r", plot_type = "scatter", title = "r vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "scan", y_var = "AUC", plot_type = "scatter", title = "AUC vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter", title = "Delta background vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "as.factor(scan)", y_var = "L", plot_type = "box", title = "L vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "as.factor(scan)", y_var = "K", plot_type = "box", title = "K vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "as.factor(scan)", y_var = "r", plot_type = "box", title = "r vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "as.factor(scan)", y_var = "AUC", plot_type = "box", title = "AUC vs Scan for Non-Zero L Values"),
|
||||||
|
list(x_var = "as.factor(scan)", y_var = "delta_bg", plot_type = "box", title = "Delta background vs Scan for Non-Zero L Values")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate and save plots for each QC step
|
# Generate and save plots for each QC step
|
||||||
@@ -672,17 +716,6 @@ main <- function() {
|
|||||||
# Clean up
|
# Clean up
|
||||||
rm(df, df_above_tolerance, df_no_zeros)
|
rm(df, df_above_tolerance, df_no_zeros)
|
||||||
|
|
||||||
# Calculate summary statistics
|
|
||||||
message("Calculating summary statistics for all strains")
|
|
||||||
variables <- c("L", "K", "r", "AUC")
|
|
||||||
ss <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
|
||||||
summary_stats <- ss$summary_stats
|
|
||||||
df_na_stats <- ss$df_with_stats
|
|
||||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
print("Summary stats:")
|
|
||||||
print(head(summary_stats), width = 200)
|
|
||||||
|
|
||||||
# TODO: Originally this filtered L NA's
|
# TODO: Originally this filtered L NA's
|
||||||
# Let's try to avoid for now since stats have already been calculated
|
# Let's try to avoid for now since stats have already been calculated
|
||||||
|
|
||||||
@@ -695,6 +728,7 @@ main <- function() {
|
|||||||
|
|
||||||
# Summary statistics for within and outside 2SD of K
|
# Summary statistics for within and outside 2SD of K
|
||||||
message("Calculating summary statistics for L within 2SD of K")
|
message("Calculating summary statistics for L within 2SD of K")
|
||||||
|
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||||
l_within_2sd_k_stats <- ss$summary_stats
|
l_within_2sd_k_stats <- ss$summary_stats
|
||||||
df_na_l_within_2sd_k_stats <- ss$df_with_stats
|
df_na_l_within_2sd_k_stats <- ss$df_with_stats
|
||||||
@@ -844,8 +878,7 @@ main <- function() {
|
|||||||
zscores_interactions_filtered <- zscores_interactions %>%
|
zscores_interactions_filtered <- zscores_interactions %>%
|
||||||
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
|
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
|
||||||
|
|
||||||
# Generate summary and correlation plots
|
|
||||||
generate_summary_plots(df, out_dir)
|
|
||||||
lm_list <- list(
|
lm_list <- list(
|
||||||
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||||
lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||||
@@ -856,10 +889,37 @@ main <- function() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Generate cpp correlation plots
|
# Generate cpp correlation plots
|
||||||
generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
|
correlation_plot_config <- list(
|
||||||
|
list(x_var = "Z_lm_L", y_var = "Z_lm_K", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_K",
|
||||||
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||||
|
list(x_var = "Z_lm_L", y_var = "Z_lm_r", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_r",
|
||||||
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||||
|
list(x_var = "Z_lm_L", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_AUC",
|
||||||
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||||
|
list(x_var = "Z_lm_K", y_var = "Z_lm_r", plot_type = "scatter", title = "Correlation between Z_lm_K and Z_lm_r",
|
||||||
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||||
|
list(x_var = "Z_lm_K", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_K and Z_lm_AUC",
|
||||||
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||||
|
list(x_var = "Z_lm_r", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_r and Z_lm_AUC",
|
||||||
|
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), )
|
||||||
|
)
|
||||||
|
generate_and_save_plots(zscores_interactions_filtered, output_dir, "CorrelationPlots", correlation_plot_config)
|
||||||
|
|
||||||
|
# Generate cpp correlation plots
|
||||||
|
#generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
|
||||||
|
|
||||||
# Generate ranked plots
|
# Generate ranked plots
|
||||||
create_ranked_plots(zscores_interactions_filtered, out_dir)
|
rank_plot_config <- list(
|
||||||
|
list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"),
|
||||||
|
list(x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "scatter", title = "Rank vs Avg Z score for K"),
|
||||||
|
list(x_var = "r_Rank", y_var = "Avg_Zscore_r", plot_type = "scatter", title = "Rank vs Avg Z score for r"),
|
||||||
|
list(x_var = "AUC_Rank", y_var = "Avg_Zscore_AUC", plot_type = "scatter", title = "Rank vs Avg Z score for AUC")
|
||||||
|
)
|
||||||
|
# Generate and save rank plots using the existing plotting framework
|
||||||
|
generate_and_save_plots(zscores_interactions_filtered, output_dir, "RankPlots", rank_plot_config)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user