Break out plot filtering

This commit is contained in:
2024-09-16 13:55:00 -04:00
parent 4f90330500
commit e07baf7a37
2 changed files with 94 additions and 125 deletions

View File

@@ -1,11 +1,12 @@
suppressMessages({ suppressMessages({
library(ggplot2) library("ggplot2")
library(plotly) library("plotly")
library(htmlwidgets) library("htmlwidgets")
library(dplyr) library("dplyr")
library(ggthemes) library("rlang")
library(data.table) library("ggthemes")
library(unix) library("data.table")
library("unix")
}) })
options(warn = 2) options(warn = 2)
@@ -568,13 +569,9 @@ generate_box_plot <- function(plot, config) {
} }
generate_interaction_plot_configs <- function(df, variables) { generate_interaction_plot_configs <- function(df, variables) {
configs <- list() configs <- list()
# Data frames to collect filtered data and out-of-range data
filtered_data_list <- list()
out_of_range_data_list <- list()
# Define common y-limits for each variable
limits_map <- list( limits_map <- list(
L = c(-65, 65), L = c(-65, 65),
K = c(-65, 65), K = c(-65, 65),
@@ -597,43 +594,21 @@ generate_interaction_plot_configs <- function(df, variables) {
SM = function(df, var) paste("SM =", df$SM) SM = function(df, var) paste("SM =", df$SM)
) )
results <- filter_data_for_plots(df, variables, limits_map)
df_filtered <- results$df_filtered
lm_lines <- filtered_results$lm_lines
# Iterate over each variable to create plot configurations
for (variable in variables) { for (variable in variables) {
# Get y-limits for the variable
ylim_vals <- limits_map[[variable]]
# Extract precomputed linear model coefficients
lm_line <- list(
intercept = df[[paste0("lm_intercept_", variable)]],
slope = df[[paste0("lm_slope_", variable)]]
)
# Filter the data based on y-limits and missing values
y_var_sym <- sym(variable)
x_var_sym <- sym("conc_num_factor")
# Identify missing data and out-of-range data
missing_data <- df %>% filter(is.na(!!x_var_sym) | is.na(!!y_var_sym))
out_of_range_data <- df %>% filter(
!is.na(!!y_var_sym) &
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
)
# Combine missing data and out-of-range data
data_to_filter_out <- bind_rows(missing_data, out_of_range_data) %>% distinct()
# Filtered data for plotting
filtered_data <- df %>% anti_join(data_to_filter_out, by = names(df))
# Collect the filtered data and out-of-range data
filtered_data_list[[variable]] <- filtered_data
out_of_range_data_list[[variable]] <- data_to_filter_out
# Calculate x and y positions for annotations based on filtered data # Calculate x and y positions for annotations based on filtered data
x_levels <- levels(filtered_data$conc_num_factor) x_levels <- levels(df_filtered$conc_num_factor)
x_pos <- mean(seq_along(x_levels)) # Midpoint of x-axis num_levels <- length(x_levels)
x_pos <- (1 + num_levels) / 2 # Midpoint of x-axis positions
y_min <- min(ylim_vals) y_range <- limits_map[[variable]]
y_max <- max(ylim_vals) y_min <- min(y_range)
y_max <- max(y_range)
y_span <- y_max - y_min y_span <- y_max - y_min
# Adjust y positions as fractions of y-span # Adjust y positions as fractions of y-span
@@ -650,7 +625,7 @@ generate_interaction_plot_configs <- function(df, variables) {
y_pos <- annotation_positions[[annotation_name]] y_pos <- annotation_positions[[annotation_name]]
label_func <- annotation_labels[[annotation_name]] label_func <- annotation_labels[[annotation_name]]
if (!is.null(label_func)) { if (!is.null(label_func)) {
label <- label_func(df, variable) label <- label_func(df_filtered, variable)
list(x = x_pos, y = y_pos, label = label) list(x = x_pos, y = y_pos, label = label)
} else { } else {
message(paste("Warning: No annotation function found for", annotation_name)) message(paste("Warning: No annotation function found for", annotation_name))
@@ -663,48 +638,40 @@ generate_interaction_plot_configs <- function(df, variables) {
# Create scatter plot config # Create scatter plot config
configs[[length(configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = filtered_data, df = df_filtered,
x_var = "conc_num_factor", x_var = "conc_num_factor",
y_var = variable, y_var = variable,
plot_type = "scatter", plot_type = "scatter",
title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]), title = sprintf("%s %s", df_filtered$OrfRep[1], df_filteredGene[1]),
ylim_vals = ylim_vals, ylim_vals = y_range,
annotations = annotations, annotations = annotations,
lm_line = lm_line, lm_line = lm_lines[[variable]],
error_bar = TRUE, error_bar = TRUE,
x_breaks = levels(filtered_data$conc_num_factor), x_breaks = levels(df_filtered$conc_num_factor),
x_labels = levels(filtered_data$conc_num_factor), x_labels = levels(df_filtered$conc_num_factor),
x_label = unique(df$Drug[1]), x_label = unique(df$Drug[1]),
position = "jitter", position = "jitter",
coord_cartesian = ylim_vals # Use the actual y-limits coord_cartesian = y_range # Use the actual y-limits
) )
# Create box plot config # Create box plot config
configs[[length(configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = filtered_data, df = df_filtered,
x_var = "conc_num_factor", x_var = "conc_num_factor",
y_var = variable, y_var = variable,
plot_type = "box", plot_type = "box",
title = sprintf("%s %s (Boxplot)", df$OrfRep[1], df$Gene[1]), title = sprintf("%s %s (Boxplot)", df_filtered$OrfRep[1], df_filtered$Gene[1]),
ylim_vals = ylim_vals, ylim_vals = y_range,
annotations = annotations, annotations = annotations,
error_bar = FALSE, error_bar = FALSE,
x_breaks = unique(filtered_data$conc_num_factor), x_breaks = levels(df_filtered$conc_num_factor),
x_labels = unique(as.character(filtered_data$conc_num)), x_labels = levels(df_filtered$conc_num_factor),
x_label = unique(df$Drug[1]), x_label = unique(df_filtered$Drug[1]),
coord_cartesian = ylim_vals coord_cartesian = y_range
) )
} }
# Combine the filtered data and out-of-range data into data frames return(configs)
filtered_data_df <- bind_rows(filtered_data_list, .id = "variable")
out_of_range_data_df <- bind_rows(out_of_range_data_list, .id = "variable")
return(list(
configs = configs,
filtered_data = filtered_data_df,
out_of_range_data = out_of_range_data_df
))
} }
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) { generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
@@ -822,6 +789,54 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
df %>% filter(if_all(all_of(vars_to_check), is.finite)) df %>% filter(if_all(all_of(vars_to_check), is.finite))
} }
filter_data_for_plots <- function(df, variables, limits_map) {
# Initialize lists to store lm lines and filtered data
lm_lines <- list()
# Print out NA and out-of-range data separately
for (variable in variables) {
# Get y-limits for the variable
ylim_vals <- limits_map[[variable]]
# Extract precomputed linear model coefficients
lm_lines[[variable]] <- list(
intercept = df[[paste0("lm_intercept_", variable)]],
slope = df[[paste0("lm_slope_", variable)]]
)
# Convert variable name to symbol for dplyr
y_var_sym <- sym(variable)
# Identify missing data and print it
missing_data <- df %>% filter(is.na(!!y_var_sym))
if (nrow(missing_data) > 0) {
message("Missing data for variable ", variable, ":")
print(missing_data)
}
# Identify out-of-range data and print it
out_of_range_data <- df %>% filter(
!is.na(!!y_var_sym) &
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
)
if (nrow(out_of_range_data) > 0) {
message("Out-of-range data for variable ", variable, ":")
print(out_of_range_data)
}
}
# Perform all filtering at once for all variables
df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>%
filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}"))
# Return the filtered dataframe and lm lines
return(list(
df_filtered = df_filtered,
lm_lines = lm_lines
))
}
main <- function() { main <- function() {
lapply(names(args$experiments), function(exp_name) { lapply(names(args$experiments), function(exp_name) {
exp <- args$experiments[[exp_name]] exp <- args$experiments[[exp_name]]
@@ -1151,22 +1166,11 @@ main <- function() {
# Create interaction plots # Create interaction plots
message("Generating reference interaction plots") message("Generating reference interaction plots")
results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars) reference_plot_configs <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
if (nrow(results$out_of_range_data) > 0) {
message("Out-of-range data:")
print(results$out_of_range_data %>% select("OrfRep", "Gene", "num",
"conc_num", "conc_num_factor", config$x_var, config$y_var))
}
reference_plot_configs <- results$configs
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
message("Generating deletion interaction plots") message("Generating deletion interaction plots")
results <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars) deletion_plot_configs <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
if (nrow(results$out_of_range_data) > 0) {
message("Out-of-range data:")
print(results$out_of_range_data)
}
deletion_plot_configs <- results$configs
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
# Define conditions for enhancers and suppressors # Define conditions for enhancers and suppressors
@@ -1253,10 +1257,10 @@ main <- function() {
ungroup() %>% ungroup() %>%
rowwise() %>% rowwise() %>%
mutate( mutate(
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared, lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared, lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared, lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared, lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA,
Overlap = case_when( Overlap = case_when(
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both", Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",

View File

@@ -614,41 +614,6 @@ interactive_header() {
module install_dependencies module install_dependencies
# @description This module will automatically install the dependencies for running QHTCP. # @description This module will automatically install the dependencies for running QHTCP.
# #
# If you wish to install them manually, you can use the following information to do so:
#
# #### System dependencies
#
# * R
# * Perl
# * Java
# * MATLAB
#
# #### MacOS
#
# * `export HOMEBREW_BREW_GIT_REMOTE=https://github.com/Homebrew/brew`
# * `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"`
# * `cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder`
# * `brew install graphiz gd pdftk-java pandoc shdoc nano rsync coreutils`
#
# #### Linux DEB
#
# * `apt install graphviz pandoc pdftk-java libgd-dev perl shdoc nano rsync coreutils libcurl-dev openssl-dev`
#
# #### Linux RPM
#
# * `dnf install graphviz pandoc pdftk-java gd-devel perl-CPAN shdoc nano rsync coreutils libcurl-devel openssl-devel`
#
# #### Perl
#
# * `cpan -I -i File::Map ExtUtils::PkgConfig GD GO::TermFinder`
#
# #### R
#
# * `install.packages(c('BiocManager', 'ontologyIndex', 'ggrepel', 'tidyverse', 'sos', 'openxlsx', 'ggplot2', 'plyr', 'extrafont', 'gridExtra', 'gplots', 'stringr', 'plotly', 'ggthemes', 'pandoc', 'rmarkdown', 'plotly', 'htmlwidgets'), dep=TRUE)`
# * `BiocManager::install('UCSC.utils')`
# * `BiocManager::install('org.Sc.sgd.db')`
#
#
install_dependencies() { install_dependencies() {
debug "Running: ${FUNCNAME[0]} $*" debug "Running: ${FUNCNAME[0]} $*"
@@ -669,8 +634,8 @@ install_dependencies() {
ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder) ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder)
depends_r=( depends_r=(
BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
rmarkdown plotly htmlwidgets gplots gdata Hmisc) rmarkdown htmlwidgets gdata Hmisc)
depends_bioc=(UCSC.utils org.Sc.sgd.db) depends_bioc=(UCSC.utils org.Sc.sgd.db)
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars