Break out plot filtering

This commit is contained in:
2024-09-16 13:55:00 -04:00
parent 4f90330500
commit e07baf7a37
2 changed files with 94 additions and 125 deletions

View File

@@ -1,11 +1,12 @@
suppressMessages({
library(ggplot2)
library(plotly)
library(htmlwidgets)
library(dplyr)
library(ggthemes)
library(data.table)
library(unix)
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("dplyr")
library("rlang")
library("ggthemes")
library("data.table")
library("unix")
})
options(warn = 2)
@@ -568,13 +569,9 @@ generate_box_plot <- function(plot, config) {
}
generate_interaction_plot_configs <- function(df, variables) {
configs <- list()
# Data frames to collect filtered data and out-of-range data
filtered_data_list <- list()
out_of_range_data_list <- list()
# Define common y-limits for each variable
limits_map <- list(
L = c(-65, 65),
K = c(-65, 65),
@@ -596,44 +593,22 @@ generate_interaction_plot_configs <- function(df, variables) {
DB = function(df, var) paste("DB =", df$DB),
SM = function(df, var) paste("SM =", df$SM)
)
results <- filter_data_for_plots(df, variables, limits_map)
df_filtered <- results$df_filtered
lm_lines <- filtered_results$lm_lines
# Iterate over each variable to create plot configurations
for (variable in variables) {
# Get y-limits for the variable
ylim_vals <- limits_map[[variable]]
# Extract precomputed linear model coefficients
lm_line <- list(
intercept = df[[paste0("lm_intercept_", variable)]],
slope = df[[paste0("lm_slope_", variable)]]
)
# Filter the data based on y-limits and missing values
y_var_sym <- sym(variable)
x_var_sym <- sym("conc_num_factor")
# Identify missing data and out-of-range data
missing_data <- df %>% filter(is.na(!!x_var_sym) | is.na(!!y_var_sym))
out_of_range_data <- df %>% filter(
!is.na(!!y_var_sym) &
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
)
# Combine missing data and out-of-range data
data_to_filter_out <- bind_rows(missing_data, out_of_range_data) %>% distinct()
# Filtered data for plotting
filtered_data <- df %>% anti_join(data_to_filter_out, by = names(df))
# Collect the filtered data and out-of-range data
filtered_data_list[[variable]] <- filtered_data
out_of_range_data_list[[variable]] <- data_to_filter_out
# Calculate x and y positions for annotations based on filtered data
x_levels <- levels(filtered_data$conc_num_factor)
x_pos <- mean(seq_along(x_levels)) # Midpoint of x-axis
y_min <- min(ylim_vals)
y_max <- max(ylim_vals)
x_levels <- levels(df_filtered$conc_num_factor)
num_levels <- length(x_levels)
x_pos <- (1 + num_levels) / 2 # Midpoint of x-axis positions
y_range <- limits_map[[variable]]
y_min <- min(y_range)
y_max <- max(y_range)
y_span <- y_max - y_min
# Adjust y positions as fractions of y-span
@@ -650,7 +625,7 @@ generate_interaction_plot_configs <- function(df, variables) {
y_pos <- annotation_positions[[annotation_name]]
label_func <- annotation_labels[[annotation_name]]
if (!is.null(label_func)) {
label <- label_func(df, variable)
label <- label_func(df_filtered, variable)
list(x = x_pos, y = y_pos, label = label)
} else {
message(paste("Warning: No annotation function found for", annotation_name))
@@ -663,48 +638,40 @@ generate_interaction_plot_configs <- function(df, variables) {
# Create scatter plot config
configs[[length(configs) + 1]] <- list(
df = filtered_data,
df = df_filtered,
x_var = "conc_num_factor",
y_var = variable,
plot_type = "scatter",
title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]),
ylim_vals = ylim_vals,
title = sprintf("%s %s", df_filtered$OrfRep[1], df_filteredGene[1]),
ylim_vals = y_range,
annotations = annotations,
lm_line = lm_line,
lm_line = lm_lines[[variable]],
error_bar = TRUE,
x_breaks = levels(filtered_data$conc_num_factor),
x_labels = levels(filtered_data$conc_num_factor),
x_breaks = levels(df_filtered$conc_num_factor),
x_labels = levels(df_filtered$conc_num_factor),
x_label = unique(df$Drug[1]),
position = "jitter",
coord_cartesian = ylim_vals # Use the actual y-limits
coord_cartesian = y_range # Use the actual y-limits
)
# Create box plot config
configs[[length(configs) + 1]] <- list(
df = filtered_data,
df = df_filtered,
x_var = "conc_num_factor",
y_var = variable,
plot_type = "box",
title = sprintf("%s %s (Boxplot)", df$OrfRep[1], df$Gene[1]),
ylim_vals = ylim_vals,
title = sprintf("%s %s (Boxplot)", df_filtered$OrfRep[1], df_filtered$Gene[1]),
ylim_vals = y_range,
annotations = annotations,
error_bar = FALSE,
x_breaks = unique(filtered_data$conc_num_factor),
x_labels = unique(as.character(filtered_data$conc_num)),
x_label = unique(df$Drug[1]),
coord_cartesian = ylim_vals
x_breaks = levels(df_filtered$conc_num_factor),
x_labels = levels(df_filtered$conc_num_factor),
x_label = unique(df_filtered$Drug[1]),
coord_cartesian = y_range
)
}
# Combine the filtered data and out-of-range data into data frames
filtered_data_df <- bind_rows(filtered_data_list, .id = "variable")
out_of_range_data_df <- bind_rows(out_of_range_data_list, .id = "variable")
return(list(
configs = configs,
filtered_data = filtered_data_df,
out_of_range_data = out_of_range_data_df
))
return(configs)
}
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
@@ -822,6 +789,54 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
df %>% filter(if_all(all_of(vars_to_check), is.finite))
}
filter_data_for_plots <- function(df, variables, limits_map) {
# Initialize lists to store lm lines and filtered data
lm_lines <- list()
# Print out NA and out-of-range data separately
for (variable in variables) {
# Get y-limits for the variable
ylim_vals <- limits_map[[variable]]
# Extract precomputed linear model coefficients
lm_lines[[variable]] <- list(
intercept = df[[paste0("lm_intercept_", variable)]],
slope = df[[paste0("lm_slope_", variable)]]
)
# Convert variable name to symbol for dplyr
y_var_sym <- sym(variable)
# Identify missing data and print it
missing_data <- df %>% filter(is.na(!!y_var_sym))
if (nrow(missing_data) > 0) {
message("Missing data for variable ", variable, ":")
print(missing_data)
}
# Identify out-of-range data and print it
out_of_range_data <- df %>% filter(
!is.na(!!y_var_sym) &
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
)
if (nrow(out_of_range_data) > 0) {
message("Out-of-range data for variable ", variable, ":")
print(out_of_range_data)
}
}
# Perform all filtering at once for all variables
df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>%
filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}"))
# Return the filtered dataframe and lm lines
return(list(
df_filtered = df_filtered,
lm_lines = lm_lines
))
}
main <- function() {
lapply(names(args$experiments), function(exp_name) {
exp <- args$experiments[[exp_name]]
@@ -1151,22 +1166,11 @@ main <- function() {
# Create interaction plots
message("Generating reference interaction plots")
results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
if (nrow(results$out_of_range_data) > 0) {
message("Out-of-range data:")
print(results$out_of_range_data %>% select("OrfRep", "Gene", "num",
"conc_num", "conc_num_factor", config$x_var, config$y_var))
}
reference_plot_configs <- results$configs
reference_plot_configs <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
message("Generating deletion interaction plots")
results <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
if (nrow(results$out_of_range_data) > 0) {
message("Out-of-range data:")
print(results$out_of_range_data)
}
deletion_plot_configs <- results$configs
deletion_plot_configs <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
# Define conditions for enhancers and suppressors
@@ -1253,10 +1257,10 @@ main <- function() {
ungroup() %>%
rowwise() %>%
mutate(
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared,
lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA,
Overlap = case_when(
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",

View File

@@ -614,41 +614,6 @@ interactive_header() {
module install_dependencies
# @description This module will automatically install the dependencies for running QHTCP.
#
# If you wish to install them manually, you can use the following information to do so:
#
# #### System dependencies
#
# * R
# * Perl
# * Java
# * MATLAB
#
# #### MacOS
#
# * `export HOMEBREW_BREW_GIT_REMOTE=https://github.com/Homebrew/brew`
# * `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"`
# * `cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder`
# * `brew install graphiz gd pdftk-java pandoc shdoc nano rsync coreutils`
#
# #### Linux DEB
#
# * `apt install graphviz pandoc pdftk-java libgd-dev perl shdoc nano rsync coreutils libcurl-dev openssl-dev`
#
# #### Linux RPM
#
# * `dnf install graphviz pandoc pdftk-java gd-devel perl-CPAN shdoc nano rsync coreutils libcurl-devel openssl-devel`
#
# #### Perl
#
# * `cpan -I -i File::Map ExtUtils::PkgConfig GD GO::TermFinder`
#
# #### R
#
# * `install.packages(c('BiocManager', 'ontologyIndex', 'ggrepel', 'tidyverse', 'sos', 'openxlsx', 'ggplot2', 'plyr', 'extrafont', 'gridExtra', 'gplots', 'stringr', 'plotly', 'ggthemes', 'pandoc', 'rmarkdown', 'plotly', 'htmlwidgets'), dep=TRUE)`
# * `BiocManager::install('UCSC.utils')`
# * `BiocManager::install('org.Sc.sgd.db')`
#
#
install_dependencies() {
debug "Running: ${FUNCNAME[0]} $*"
@@ -669,8 +634,8 @@ install_dependencies() {
ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder)
depends_r=(
BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc
rmarkdown plotly htmlwidgets gplots gdata Hmisc)
dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
rmarkdown htmlwidgets gdata Hmisc)
depends_bioc=(UCSC.utils org.Sc.sgd.db)
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars