Before old rank plot removal
This commit is contained in:
@@ -9,6 +9,7 @@ suppressMessages({
|
||||
})
|
||||
|
||||
options(warn = 2, max.print = 1000)
|
||||
options(width = 10000)
|
||||
|
||||
# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes)
|
||||
soft_limit <- 30 * 1024 * 1024 * 1024
|
||||
@@ -156,7 +157,7 @@ update_gene_names <- function(df, sgd_gene_list) {
|
||||
return(df)
|
||||
}
|
||||
|
||||
# Process strains (deletion and reference)
|
||||
# Process either deletion and or reference strain(s)
|
||||
process_strains <- function(df) {
|
||||
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
||||
|
||||
@@ -187,6 +188,9 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
||||
df <- df %>%
|
||||
mutate(across(all_of(variables), ~ifelse(. == 0, NA, .)))
|
||||
|
||||
print("Head of df for summary stats calculations:")
|
||||
print(head(df))
|
||||
|
||||
# Calculate summary statistics, including a single N based on L
|
||||
summary_stats <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
@@ -195,13 +199,16 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
||||
across(all_of(variables), list(
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
||||
sd = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE), 0), # If N == 1, sd is set to 0
|
||||
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), 0) # If N == 1, se is set to 0
|
||||
max = ~max(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1) # TODO unsure why this is - 1
|
||||
), .names = "{.fn}_{.col}")
|
||||
)
|
||||
|
||||
print("Summary stats:")
|
||||
print(head(summary_stats))
|
||||
|
||||
# Remove existing stats columns from df if they already exist
|
||||
stat_columns <- setdiff(names(summary_stats), group_vars)
|
||||
df_cleaned <- df %>% select(-any_of(stat_columns))
|
||||
@@ -215,10 +222,10 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
||||
# Calculate interaction scores
|
||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||
|
||||
if (nrow(df) == 0) {
|
||||
message("Dataframe is empty after filtering")
|
||||
return(NULL) # Or handle the empty dataframe case as needed
|
||||
}
|
||||
# if (nrow(df) == 0) {
|
||||
# message("Dataframe is empty after filtering")
|
||||
# return(NULL) # Or handle the empty dataframe case as needed
|
||||
# }
|
||||
|
||||
# Calculate total concentration variables
|
||||
total_conc_num <- length(unique(df$conc_num))
|
||||
@@ -304,6 +311,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
print("Interaction scores:")
|
||||
print(head(interaction_scores))
|
||||
|
||||
# Calculate linear models and interaction scores per gene
|
||||
print("Calculating interaction scores part 2")
|
||||
interaction_scores_all <- interaction_scores %>%
|
||||
@@ -329,6 +339,107 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
||||
}
|
||||
|
||||
|
||||
generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_var = "conc_num",
|
||||
title = "", x_label = NULL, y_label = NULL, ylim_vals = NULL) {
|
||||
|
||||
# Initialize plot with dynamic x and color variables
|
||||
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
|
||||
|
||||
if (!is.null(y_var)) {
|
||||
plot <- plot + aes(y = !!sym(y_var))
|
||||
|
||||
# Add scatter points with summary stats if `y_var` is present
|
||||
y_mean_col <- paste0("mean_", y_var)
|
||||
y_sd_col <- paste0("sd_", y_var)
|
||||
|
||||
plot <- plot + geom_point(shape = 3, size = 0.2) +
|
||||
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
|
||||
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
|
||||
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
|
||||
}
|
||||
|
||||
# Add the required plot type (scatter, box, density, bar)
|
||||
plot <- switch(plot_type,
|
||||
"box" = plot + geom_boxplot(),
|
||||
"density" = plot + geom_density(),
|
||||
"bar" = plot + geom_bar(stat = "identity"),
|
||||
plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter
|
||||
)
|
||||
|
||||
# Apply y-axis limits if specified
|
||||
if (!is.null(ylim_vals)) {
|
||||
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
||||
}
|
||||
|
||||
# Add title and axis labels
|
||||
plot <- plot + ggtitle(title) + theme_publication()
|
||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||
|
||||
return(plot)
|
||||
}
|
||||
|
||||
|
||||
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
||||
plots <- list()
|
||||
|
||||
if (nrow(df) == 0) {
|
||||
message("The dataframe is empty, skipping plots")
|
||||
return()
|
||||
}
|
||||
|
||||
message("Generating plots for dataframe")
|
||||
|
||||
for (config in plot_configs) {
|
||||
# Generate the plot using the configurations
|
||||
plot <- generate_plot(
|
||||
df = df,
|
||||
x_var = config$x_var,
|
||||
y_var = config$y_var,
|
||||
plot_type = config$plot_type,
|
||||
title = config$title,
|
||||
ylim_vals = config$ylim_vals
|
||||
)
|
||||
|
||||
# Add custom annotations if provided
|
||||
if (!is.null(config$annotations)) {
|
||||
for (annotation in config$annotations) {
|
||||
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
|
||||
}
|
||||
}
|
||||
|
||||
# Store the plot with variable name and plot type
|
||||
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
|
||||
}
|
||||
|
||||
# Save plots to PDF and HTML
|
||||
save_plots(file_prefix, plots, output_dir)
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Ensure all plots are saved and printed to PDF
|
||||
save_plots <- function(file_name, plot_list, output_dir) {
|
||||
# Save all plots to a single PDF
|
||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||
lapply(plot_list, print)
|
||||
dev.off()
|
||||
|
||||
# Save each plot as an interactive HTML file
|
||||
lapply(names(plot_list), function(plot_name) {
|
||||
pgg <- tryCatch({
|
||||
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
||||
layout(legend = list(orientation = "h")))
|
||||
}, error = function(e) {
|
||||
message("Error generating plot: ", plot_name)
|
||||
return(NULL)
|
||||
})
|
||||
|
||||
if (!is.null(pgg)) {
|
||||
saveWidget(pgg, file = file.path(output_dir, paste0(file_name, "_", plot_name, ".html")), selfcontained = TRUE)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
interaction_plot_configs <- function(df, variable) {
|
||||
@@ -361,102 +472,6 @@ interaction_plot_configs <- function(df, variable) {
|
||||
)
|
||||
}
|
||||
|
||||
generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num",
|
||||
title, x_label = NULL, y_label = NULL, ylim_vals = NULL) {
|
||||
|
||||
# Use tidy evaluation with aes() and !!sym() for dynamic column names
|
||||
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
|
||||
|
||||
if (!is.null(y_var)) {
|
||||
plot <- plot + aes(y = !!sym(y_var))
|
||||
}
|
||||
|
||||
# Set up the plot based on the requested plot type
|
||||
plot <- switch(plot_type,
|
||||
"scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE),
|
||||
"box" = plot + geom_boxplot(),
|
||||
"density" = plot + geom_density(),
|
||||
"bar" = plot + geom_bar(),
|
||||
plot # Default: return the plot as is
|
||||
)
|
||||
|
||||
if (!is.null(ylim_vals)) {
|
||||
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
||||
}
|
||||
|
||||
# Add titles and labels if available
|
||||
plot <- plot + ggtitle(title) + theme_publication()
|
||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||
|
||||
return(plot)
|
||||
}
|
||||
|
||||
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
|
||||
plots <- list()
|
||||
|
||||
if (nrow(df) == 0) {
|
||||
message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
|
||||
return()
|
||||
}
|
||||
|
||||
message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
|
||||
|
||||
for (config in plot_configs) {
|
||||
plot <- generate_plot(
|
||||
df = df,
|
||||
x_var = config$x_var,
|
||||
y_var = config$y_var,
|
||||
plot_type = config$plot_type,
|
||||
title = config$title,
|
||||
ylim_vals = config$ylim_vals
|
||||
)
|
||||
|
||||
# If custom annotations for interaction plots are required
|
||||
if (!is.null(config$annotations)) {
|
||||
for (annotation in config$annotations) {
|
||||
plot <- plot +
|
||||
annotate(
|
||||
"text", x = annotation$x, y = annotation$y, label = annotation$label
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# Store the plot with the variable name and plot type
|
||||
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
|
||||
}
|
||||
|
||||
save_plots(file_prefix, plots, output_dir)
|
||||
}
|
||||
|
||||
# Ensure all plots are saved and printed to PDF
|
||||
save_plots <- function(file_name, plot_list, output_dir) {
|
||||
# Save to PDF
|
||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||
lapply(plot_list, function(plot) {
|
||||
print(plot)
|
||||
})
|
||||
dev.off()
|
||||
|
||||
# Save to HTML with horizontal legend orientation
|
||||
lapply(names(plot_list), function(plot_name) {
|
||||
message("Generating plot: ", plot_name)
|
||||
pgg <- tryCatch({
|
||||
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
||||
layout(legend = list(orientation = "h")))
|
||||
}, error = function(e) {
|
||||
message("Error in plot: ", plot_name, "\n", e)
|
||||
return(NULL)
|
||||
})
|
||||
if (!is.null(pgg)) {
|
||||
saveWidget(pgg,
|
||||
file = file.path(output_dir,
|
||||
paste0(file_name, "_", plot_name, ".html")),
|
||||
selfcontained = TRUE)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
generate_interaction_plots <- function(df, output_file) {
|
||||
message("Generating interaction plots")
|
||||
|
||||
@@ -503,21 +518,27 @@ generate_interaction_plots <- function(df, output_file) {
|
||||
|
||||
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
|
||||
lm_summaries <- lapply(lm_list, summary)
|
||||
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
|
||||
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
|
||||
|
||||
plot_list <- lapply(seq_along(lm_list), function(i) {
|
||||
ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) +
|
||||
geom_point(shape = 3, color = "gray70") +
|
||||
geom_smooth(method = "lm", color = "tomato3") +
|
||||
ggtitle(plot_titles[i]) +
|
||||
annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[i]]$r.squared, 3))) +
|
||||
theme_Publication_legend_right()
|
||||
# Define plot titles and annotation based on R-squared values from the lm_list
|
||||
plot_configs <- lapply(seq_along(lm_list), function(i) {
|
||||
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
|
||||
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
|
||||
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
|
||||
|
||||
list(
|
||||
x_var = names(lm_list)[i][1],
|
||||
y_var = names(lm_list)[i][2],
|
||||
plot_type = "scatter",
|
||||
title = plot_titles[i],
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
|
||||
)
|
||||
})
|
||||
|
||||
save_plots("Correlation_CPPs", plot_list, output_dir)
|
||||
# Generate and save the plots using the new system
|
||||
generate_and_save_plots(df_na_rm, output_dir, "Correlation_CPPs", plot_configs)
|
||||
}
|
||||
|
||||
|
||||
# Adjust missing values and calculate ranks
|
||||
adjust_missing_and_rank <- function(df, variables) {
|
||||
|
||||
@@ -570,39 +591,38 @@ generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num",
|
||||
}
|
||||
|
||||
# Function to generate rank plots for the provided dataframe
|
||||
create_ranked_plots <- function(df, output_dir) {
|
||||
# create_ranked_plots <- function(df, output_dir) {
|
||||
|
||||
# List of variables for which we need to generate rank plots
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
# # List of variables for which we need to generate rank plots
|
||||
# variables <- c("L", "K", "r", "AUC")
|
||||
|
||||
# Adjust missing values and calculate ranks
|
||||
df_adjusted <- adjust_missing_and_rank(df, variables)
|
||||
# # Adjust missing values and calculate ranks
|
||||
# df_adjusted <- adjust_missing_and_rank(df, variables)
|
||||
|
||||
# Generate rank plots for Avg_Zscore and Z_lm
|
||||
for (var in variables) {
|
||||
plot_rank_avg <- generate_plot(
|
||||
df = df_adjusted,
|
||||
x_var = paste0(var, "_Rank"),
|
||||
y_var = paste0("Avg_Zscore_", var),
|
||||
plot_type = "scatter",
|
||||
title = paste("Average Z score vs Rank for", var)
|
||||
)
|
||||
# # Generate rank plots
|
||||
# for (var in variables) {
|
||||
# plot_rank_avg <- generate_plot(
|
||||
# df = df_adjusted,
|
||||
# x_var = paste0(var, "_Rank"),
|
||||
# y_var = paste0("Avg_Zscore_", var),
|
||||
# plot_type = "scatter",
|
||||
# title = paste("Average Z score vs Rank for", var)
|
||||
# )
|
||||
|
||||
plot_rank_lm <- generate_plot(
|
||||
df = df_adjusted,
|
||||
x_var = paste0(var, "_Rank_lm"),
|
||||
y_var = paste0("Z_lm_", var),
|
||||
plot_type = "scatter",
|
||||
title = paste("Interaction Z score vs Rank for", var)
|
||||
)
|
||||
# plot_rank_lm <- generate_plot(
|
||||
# df = df_adjusted,
|
||||
# x_var = paste0(var, "_Rank_lm"),
|
||||
# y_var = paste0("Z_lm_", var),
|
||||
# plot_type = "scatter",
|
||||
# title = paste("Interaction Z score vs Rank for", var)
|
||||
# )
|
||||
|
||||
# Save the plots for Avg_Zscore and Z_lm
|
||||
save_plots(paste0("RankPlots_", var), list(
|
||||
plot_rank_avg = plot_rank_avg,
|
||||
plot_rank_lm = plot_rank_lm
|
||||
), output_dir)
|
||||
}
|
||||
}
|
||||
# save_plots(paste0("RankPlots_", var), list(
|
||||
# plot_rank_avg = plot_rank_avg,
|
||||
# plot_rank_lm = plot_rank_lm
|
||||
# ), output_dir)
|
||||
# }
|
||||
# }
|
||||
|
||||
main <- function() {
|
||||
lapply(names(args$experiments), function(exp_name) {
|
||||
@@ -622,8 +642,6 @@ main <- function() {
|
||||
|
||||
# QC steps and filtering
|
||||
df_above_tolerance <- df %>% filter(DB == 1)
|
||||
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
|
||||
# Calculate the half-medians for `L` and `K` for rows above tolerance
|
||||
L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
||||
@@ -632,7 +650,24 @@ main <- function() {
|
||||
# Get the number of rows that are above tolerance
|
||||
rows_to_remove <- nrow(df_above_tolerance)
|
||||
|
||||
# Additional filtering for non-finite values in df_na
|
||||
# Set L, r, K, and AUC to NA for rows that are above tolerance
|
||||
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
|
||||
|
||||
# Calculate summary statistics for all strains, including both background and the deletions
|
||||
message("Calculating summary statistics for all strains")
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
ss <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
summary_stats <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
|
||||
print("Summary stats:")
|
||||
print(head(summary_stats), width = 200)
|
||||
|
||||
# Remove rows with 0 values in L
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
|
||||
# Additional filtering for non-finite values
|
||||
df_na_filtered <- df_na %>%
|
||||
filter(if_any(c(L), ~ !is.finite(.))) %>%
|
||||
{
|
||||
@@ -647,7 +682,7 @@ main <- function() {
|
||||
qc_plot_configs <- list(
|
||||
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", ylim_vals = NULL),
|
||||
list(x_var = "delta_bg", y_var = NULL, plot_type = "density", title = "Density plot for Delta Background", ylim_vals = NULL),
|
||||
list(x_var = "delta_bg", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL)
|
||||
list(x_var = "as.factor(delta_bg)", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL)
|
||||
)
|
||||
|
||||
above_tolerance_plot_configs <- list(
|
||||
@@ -659,7 +694,16 @@ main <- function() {
|
||||
)
|
||||
|
||||
no_zeros_plot_configs <- list(
|
||||
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Non-Zero L Values", ylim_vals = NULL)
|
||||
list(x_var = "scan", y_var = "L", plot_type = "scatter", title = "L vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "scan", y_var = "K", plot_type = "scatter", title = "K vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "scan", y_var = "r", plot_type = "scatter", title = "r vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "scan", y_var = "AUC", plot_type = "scatter", title = "AUC vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter", title = "Delta background vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "as.factor(scan)", y_var = "L", plot_type = "box", title = "L vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "as.factor(scan)", y_var = "K", plot_type = "box", title = "K vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "as.factor(scan)", y_var = "r", plot_type = "box", title = "r vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "as.factor(scan)", y_var = "AUC", plot_type = "box", title = "AUC vs Scan for Non-Zero L Values"),
|
||||
list(x_var = "as.factor(scan)", y_var = "delta_bg", plot_type = "box", title = "Delta background vs Scan for Non-Zero L Values")
|
||||
)
|
||||
|
||||
# Generate and save plots for each QC step
|
||||
@@ -672,17 +716,6 @@ main <- function() {
|
||||
# Clean up
|
||||
rm(df, df_above_tolerance, df_no_zeros)
|
||||
|
||||
# Calculate summary statistics
|
||||
message("Calculating summary statistics for all strains")
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
ss <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
summary_stats <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
|
||||
print("Summary stats:")
|
||||
print(head(summary_stats), width = 200)
|
||||
|
||||
# TODO: Originally this filtered L NA's
|
||||
# Let's try to avoid for now since stats have already been calculated
|
||||
|
||||
@@ -695,6 +728,7 @@ main <- function() {
|
||||
|
||||
# Summary statistics for within and outside 2SD of K
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
l_within_2sd_k_stats <- ss$summary_stats
|
||||
df_na_l_within_2sd_k_stats <- ss$df_with_stats
|
||||
@@ -844,8 +878,7 @@ main <- function() {
|
||||
zscores_interactions_filtered <- zscores_interactions %>%
|
||||
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
|
||||
|
||||
# Generate summary and correlation plots
|
||||
generate_summary_plots(df, out_dir)
|
||||
|
||||
lm_list <- list(
|
||||
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||
lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||
@@ -856,10 +889,37 @@ main <- function() {
|
||||
)
|
||||
|
||||
# Generate cpp correlation plots
|
||||
generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
|
||||
correlation_plot_config <- list(
|
||||
list(x_var = "Z_lm_L", y_var = "Z_lm_K", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_K",
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||
list(x_var = "Z_lm_L", y_var = "Z_lm_r", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_r",
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||
list(x_var = "Z_lm_L", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_L and Z_lm_AUC",
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||
list(x_var = "Z_lm_K", y_var = "Z_lm_r", plot_type = "scatter", title = "Correlation between Z_lm_K and Z_lm_r",
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||
list(x_var = "Z_lm_K", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_K and Z_lm_AUC",
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), ),
|
||||
list(x_var = "Z_lm_r", y_var = "Z_lm_AUC", plot_type = "scatter", title = "Correlation between Z_lm_r and Z_lm_AUC",
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", round(lm_summaries[[1]]$r.squared, 3)))), )
|
||||
)
|
||||
generate_and_save_plots(zscores_interactions_filtered, output_dir, "CorrelationPlots", correlation_plot_config)
|
||||
|
||||
# Generate cpp correlation plots
|
||||
#generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
|
||||
|
||||
# Generate ranked plots
|
||||
create_ranked_plots(zscores_interactions_filtered, out_dir)
|
||||
rank_plot_config <- list(
|
||||
list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"),
|
||||
list(x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "scatter", title = "Rank vs Avg Z score for K"),
|
||||
list(x_var = "r_Rank", y_var = "Avg_Zscore_r", plot_type = "scatter", title = "Rank vs Avg Z score for r"),
|
||||
list(x_var = "AUC_Rank", y_var = "Avg_Zscore_AUC", plot_type = "scatter", title = "Rank vs Avg Z score for AUC")
|
||||
)
|
||||
# Generate and save rank plots using the existing plotting framework
|
||||
generate_and_save_plots(zscores_interactions_filtered, output_dir, "RankPlots", rank_plot_config)
|
||||
|
||||
|
||||
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user