Break out plot filtering
This commit is contained in:
@@ -1,11 +1,12 @@
|
|||||||
suppressMessages({
|
suppressMessages({
|
||||||
library(ggplot2)
|
library("ggplot2")
|
||||||
library(plotly)
|
library("plotly")
|
||||||
library(htmlwidgets)
|
library("htmlwidgets")
|
||||||
library(dplyr)
|
library("dplyr")
|
||||||
library(ggthemes)
|
library("rlang")
|
||||||
library(data.table)
|
library("ggthemes")
|
||||||
library(unix)
|
library("data.table")
|
||||||
|
library("unix")
|
||||||
})
|
})
|
||||||
|
|
||||||
options(warn = 2)
|
options(warn = 2)
|
||||||
@@ -568,13 +569,9 @@ generate_box_plot <- function(plot, config) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
generate_interaction_plot_configs <- function(df, variables) {
|
generate_interaction_plot_configs <- function(df, variables) {
|
||||||
|
|
||||||
configs <- list()
|
configs <- list()
|
||||||
|
|
||||||
# Data frames to collect filtered data and out-of-range data
|
|
||||||
filtered_data_list <- list()
|
|
||||||
out_of_range_data_list <- list()
|
|
||||||
|
|
||||||
# Define common y-limits for each variable
|
|
||||||
limits_map <- list(
|
limits_map <- list(
|
||||||
L = c(-65, 65),
|
L = c(-65, 65),
|
||||||
K = c(-65, 65),
|
K = c(-65, 65),
|
||||||
@@ -597,43 +594,21 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|||||||
SM = function(df, var) paste("SM =", df$SM)
|
SM = function(df, var) paste("SM =", df$SM)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
results <- filter_data_for_plots(df, variables, limits_map)
|
||||||
|
df_filtered <- results$df_filtered
|
||||||
|
lm_lines <- filtered_results$lm_lines
|
||||||
|
|
||||||
|
# Iterate over each variable to create plot configurations
|
||||||
for (variable in variables) {
|
for (variable in variables) {
|
||||||
# Get y-limits for the variable
|
|
||||||
ylim_vals <- limits_map[[variable]]
|
|
||||||
|
|
||||||
# Extract precomputed linear model coefficients
|
|
||||||
lm_line <- list(
|
|
||||||
intercept = df[[paste0("lm_intercept_", variable)]],
|
|
||||||
slope = df[[paste0("lm_slope_", variable)]]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Filter the data based on y-limits and missing values
|
|
||||||
y_var_sym <- sym(variable)
|
|
||||||
x_var_sym <- sym("conc_num_factor")
|
|
||||||
|
|
||||||
# Identify missing data and out-of-range data
|
|
||||||
missing_data <- df %>% filter(is.na(!!x_var_sym) | is.na(!!y_var_sym))
|
|
||||||
out_of_range_data <- df %>% filter(
|
|
||||||
!is.na(!!y_var_sym) &
|
|
||||||
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
|
|
||||||
)
|
|
||||||
|
|
||||||
# Combine missing data and out-of-range data
|
|
||||||
data_to_filter_out <- bind_rows(missing_data, out_of_range_data) %>% distinct()
|
|
||||||
|
|
||||||
# Filtered data for plotting
|
|
||||||
filtered_data <- df %>% anti_join(data_to_filter_out, by = names(df))
|
|
||||||
|
|
||||||
# Collect the filtered data and out-of-range data
|
|
||||||
filtered_data_list[[variable]] <- filtered_data
|
|
||||||
out_of_range_data_list[[variable]] <- data_to_filter_out
|
|
||||||
|
|
||||||
# Calculate x and y positions for annotations based on filtered data
|
# Calculate x and y positions for annotations based on filtered data
|
||||||
x_levels <- levels(filtered_data$conc_num_factor)
|
x_levels <- levels(df_filtered$conc_num_factor)
|
||||||
x_pos <- mean(seq_along(x_levels)) # Midpoint of x-axis
|
num_levels <- length(x_levels)
|
||||||
|
x_pos <- (1 + num_levels) / 2 # Midpoint of x-axis positions
|
||||||
|
|
||||||
y_min <- min(ylim_vals)
|
y_range <- limits_map[[variable]]
|
||||||
y_max <- max(ylim_vals)
|
y_min <- min(y_range)
|
||||||
|
y_max <- max(y_range)
|
||||||
y_span <- y_max - y_min
|
y_span <- y_max - y_min
|
||||||
|
|
||||||
# Adjust y positions as fractions of y-span
|
# Adjust y positions as fractions of y-span
|
||||||
@@ -650,7 +625,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|||||||
y_pos <- annotation_positions[[annotation_name]]
|
y_pos <- annotation_positions[[annotation_name]]
|
||||||
label_func <- annotation_labels[[annotation_name]]
|
label_func <- annotation_labels[[annotation_name]]
|
||||||
if (!is.null(label_func)) {
|
if (!is.null(label_func)) {
|
||||||
label <- label_func(df, variable)
|
label <- label_func(df_filtered, variable)
|
||||||
list(x = x_pos, y = y_pos, label = label)
|
list(x = x_pos, y = y_pos, label = label)
|
||||||
} else {
|
} else {
|
||||||
message(paste("Warning: No annotation function found for", annotation_name))
|
message(paste("Warning: No annotation function found for", annotation_name))
|
||||||
@@ -663,48 +638,40 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|||||||
|
|
||||||
# Create scatter plot config
|
# Create scatter plot config
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = filtered_data,
|
df = df_filtered,
|
||||||
x_var = "conc_num_factor",
|
x_var = "conc_num_factor",
|
||||||
y_var = variable,
|
y_var = variable,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]),
|
title = sprintf("%s %s", df_filtered$OrfRep[1], df_filteredGene[1]),
|
||||||
ylim_vals = ylim_vals,
|
ylim_vals = y_range,
|
||||||
annotations = annotations,
|
annotations = annotations,
|
||||||
lm_line = lm_line,
|
lm_line = lm_lines[[variable]],
|
||||||
error_bar = TRUE,
|
error_bar = TRUE,
|
||||||
x_breaks = levels(filtered_data$conc_num_factor),
|
x_breaks = levels(df_filtered$conc_num_factor),
|
||||||
x_labels = levels(filtered_data$conc_num_factor),
|
x_labels = levels(df_filtered$conc_num_factor),
|
||||||
x_label = unique(df$Drug[1]),
|
x_label = unique(df$Drug[1]),
|
||||||
position = "jitter",
|
position = "jitter",
|
||||||
coord_cartesian = ylim_vals # Use the actual y-limits
|
coord_cartesian = y_range # Use the actual y-limits
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create box plot config
|
# Create box plot config
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = filtered_data,
|
df = df_filtered,
|
||||||
x_var = "conc_num_factor",
|
x_var = "conc_num_factor",
|
||||||
y_var = variable,
|
y_var = variable,
|
||||||
plot_type = "box",
|
plot_type = "box",
|
||||||
title = sprintf("%s %s (Boxplot)", df$OrfRep[1], df$Gene[1]),
|
title = sprintf("%s %s (Boxplot)", df_filtered$OrfRep[1], df_filtered$Gene[1]),
|
||||||
ylim_vals = ylim_vals,
|
ylim_vals = y_range,
|
||||||
annotations = annotations,
|
annotations = annotations,
|
||||||
error_bar = FALSE,
|
error_bar = FALSE,
|
||||||
x_breaks = unique(filtered_data$conc_num_factor),
|
x_breaks = levels(df_filtered$conc_num_factor),
|
||||||
x_labels = unique(as.character(filtered_data$conc_num)),
|
x_labels = levels(df_filtered$conc_num_factor),
|
||||||
x_label = unique(df$Drug[1]),
|
x_label = unique(df_filtered$Drug[1]),
|
||||||
coord_cartesian = ylim_vals
|
coord_cartesian = y_range
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Combine the filtered data and out-of-range data into data frames
|
return(configs)
|
||||||
filtered_data_df <- bind_rows(filtered_data_list, .id = "variable")
|
|
||||||
out_of_range_data_df <- bind_rows(out_of_range_data_list, .id = "variable")
|
|
||||||
|
|
||||||
return(list(
|
|
||||||
configs = configs,
|
|
||||||
filtered_data = filtered_data_df,
|
|
||||||
out_of_range_data = out_of_range_data_df
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
|
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
|
||||||
@@ -822,6 +789,54 @@ filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
|
|||||||
df %>% filter(if_all(all_of(vars_to_check), is.finite))
|
df %>% filter(if_all(all_of(vars_to_check), is.finite))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
filter_data_for_plots <- function(df, variables, limits_map) {
|
||||||
|
|
||||||
|
# Initialize lists to store lm lines and filtered data
|
||||||
|
lm_lines <- list()
|
||||||
|
|
||||||
|
# Print out NA and out-of-range data separately
|
||||||
|
for (variable in variables) {
|
||||||
|
# Get y-limits for the variable
|
||||||
|
ylim_vals <- limits_map[[variable]]
|
||||||
|
|
||||||
|
# Extract precomputed linear model coefficients
|
||||||
|
lm_lines[[variable]] <- list(
|
||||||
|
intercept = df[[paste0("lm_intercept_", variable)]],
|
||||||
|
slope = df[[paste0("lm_slope_", variable)]]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert variable name to symbol for dplyr
|
||||||
|
y_var_sym <- sym(variable)
|
||||||
|
|
||||||
|
# Identify missing data and print it
|
||||||
|
missing_data <- df %>% filter(is.na(!!y_var_sym))
|
||||||
|
if (nrow(missing_data) > 0) {
|
||||||
|
message("Missing data for variable ", variable, ":")
|
||||||
|
print(missing_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Identify out-of-range data and print it
|
||||||
|
out_of_range_data <- df %>% filter(
|
||||||
|
!is.na(!!y_var_sym) &
|
||||||
|
(!!y_var_sym < min(ylim_vals, na.rm = TRUE) | !!y_var_sym > max(ylim_vals, na.rm = TRUE))
|
||||||
|
)
|
||||||
|
if (nrow(out_of_range_data) > 0) {
|
||||||
|
message("Out-of-range data for variable ", variable, ":")
|
||||||
|
print(out_of_range_data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Perform all filtering at once for all variables
|
||||||
|
df_filtered <- df %>% filter(across(all_of(variables), ~ !is.na(.))) %>%
|
||||||
|
filter(across(all_of(variables), ~ between(., limits_map[[cur_column()]][1], limits_map[[cur_column()]][2]), .names = "filter_{col}"))
|
||||||
|
|
||||||
|
# Return the filtered dataframe and lm lines
|
||||||
|
return(list(
|
||||||
|
df_filtered = df_filtered,
|
||||||
|
lm_lines = lm_lines
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
exp <- args$experiments[[exp_name]]
|
exp <- args$experiments[[exp_name]]
|
||||||
@@ -1151,22 +1166,11 @@ main <- function() {
|
|||||||
|
|
||||||
# Create interaction plots
|
# Create interaction plots
|
||||||
message("Generating reference interaction plots")
|
message("Generating reference interaction plots")
|
||||||
results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
|
reference_plot_configs <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
|
||||||
if (nrow(results$out_of_range_data) > 0) {
|
|
||||||
message("Out-of-range data:")
|
|
||||||
print(results$out_of_range_data %>% select("OrfRep", "Gene", "num",
|
|
||||||
"conc_num", "conc_num_factor", config$x_var, config$y_var))
|
|
||||||
}
|
|
||||||
reference_plot_configs <- results$configs
|
|
||||||
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
||||||
|
|
||||||
message("Generating deletion interaction plots")
|
message("Generating deletion interaction plots")
|
||||||
results <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
|
deletion_plot_configs <- generate_interaction_plot_configs(zscores_interactions_joined, interaction_vars)
|
||||||
if (nrow(results$out_of_range_data) > 0) {
|
|
||||||
message("Out-of-range data:")
|
|
||||||
print(results$out_of_range_data)
|
|
||||||
}
|
|
||||||
deletion_plot_configs <- results$configs
|
|
||||||
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
||||||
|
|
||||||
# Define conditions for enhancers and suppressors
|
# Define conditions for enhancers and suppressors
|
||||||
@@ -1253,10 +1257,10 @@ main <- function() {
|
|||||||
ungroup() %>%
|
ungroup() %>%
|
||||||
rowwise() %>%
|
rowwise() %>%
|
||||||
mutate(
|
mutate(
|
||||||
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
|
lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
|
||||||
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
|
lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
|
||||||
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
|
lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
|
||||||
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared,
|
lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA,
|
||||||
|
|
||||||
Overlap = case_when(
|
Overlap = case_when(
|
||||||
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
|
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
|
||||||
|
|||||||
@@ -614,41 +614,6 @@ interactive_header() {
|
|||||||
module install_dependencies
|
module install_dependencies
|
||||||
# @description This module will automatically install the dependencies for running QHTCP.
|
# @description This module will automatically install the dependencies for running QHTCP.
|
||||||
#
|
#
|
||||||
# If you wish to install them manually, you can use the following information to do so:
|
|
||||||
#
|
|
||||||
# #### System dependencies
|
|
||||||
#
|
|
||||||
# * R
|
|
||||||
# * Perl
|
|
||||||
# * Java
|
|
||||||
# * MATLAB
|
|
||||||
#
|
|
||||||
# #### MacOS
|
|
||||||
#
|
|
||||||
# * `export HOMEBREW_BREW_GIT_REMOTE=https://github.com/Homebrew/brew`
|
|
||||||
# * `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"`
|
|
||||||
# * `cpan File::Map ExtUtils::PkgConfig GD GO::TermFinder`
|
|
||||||
# * `brew install graphiz gd pdftk-java pandoc shdoc nano rsync coreutils`
|
|
||||||
#
|
|
||||||
# #### Linux DEB
|
|
||||||
#
|
|
||||||
# * `apt install graphviz pandoc pdftk-java libgd-dev perl shdoc nano rsync coreutils libcurl-dev openssl-dev`
|
|
||||||
#
|
|
||||||
# #### Linux RPM
|
|
||||||
#
|
|
||||||
# * `dnf install graphviz pandoc pdftk-java gd-devel perl-CPAN shdoc nano rsync coreutils libcurl-devel openssl-devel`
|
|
||||||
#
|
|
||||||
# #### Perl
|
|
||||||
#
|
|
||||||
# * `cpan -I -i File::Map ExtUtils::PkgConfig GD GO::TermFinder`
|
|
||||||
#
|
|
||||||
# #### R
|
|
||||||
#
|
|
||||||
# * `install.packages(c('BiocManager', 'ontologyIndex', 'ggrepel', 'tidyverse', 'sos', 'openxlsx', 'ggplot2', 'plyr', 'extrafont', 'gridExtra', 'gplots', 'stringr', 'plotly', 'ggthemes', 'pandoc', 'rmarkdown', 'plotly', 'htmlwidgets'), dep=TRUE)`
|
|
||||||
# * `BiocManager::install('UCSC.utils')`
|
|
||||||
# * `BiocManager::install('org.Sc.sgd.db')`
|
|
||||||
#
|
|
||||||
#
|
|
||||||
install_dependencies() {
|
install_dependencies() {
|
||||||
debug "Running: ${FUNCNAME[0]} $*"
|
debug "Running: ${FUNCNAME[0]} $*"
|
||||||
|
|
||||||
@@ -669,8 +634,8 @@ install_dependencies() {
|
|||||||
ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder)
|
ExtUtils::PkgConfig IPC::Run Module::Build::Tiny GD GO::TermFinder)
|
||||||
depends_r=(
|
depends_r=(
|
||||||
BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
|
BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
|
||||||
plyr extrafont gridExtra gplots stringr plotly ggthemes pandoc
|
dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
|
||||||
rmarkdown plotly htmlwidgets gplots gdata Hmisc)
|
rmarkdown htmlwidgets gdata Hmisc)
|
||||||
depends_bioc=(UCSC.utils org.Sc.sgd.db)
|
depends_bioc=(UCSC.utils org.Sc.sgd.db)
|
||||||
|
|
||||||
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
|
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
|
||||||
|
|||||||
Reference in New Issue
Block a user