Separate adjustments from ranks

This commit is contained in:
2024-09-16 12:21:40 -04:00
parent 1dfb5d5084
commit 22236fef49

View File

@@ -709,23 +709,21 @@ generate_interaction_plot_configs <- function(df, variables) {
generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) { generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) {
# Adjust missing values and compute ranks for each interaction variable
if (adjust) {
for (var in interaction_vars) { for (var in interaction_vars) {
avg_zscore_col <- paste0("Avg_Zscore_", var) avg_zscore_col <- paste0("Avg_Zscore_", var)
z_lm_col <- paste0("Z_lm_", var) z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var) rank_col <- paste0("Rank_", var)
rank_lm_col <- paste0("Rank_lm_", var) rank_lm_col <- paste0("Rank_lm_", var)
if (adjust) {
# Replace NA with 0.001 for interaction variables # Replace NA with 0.001 for interaction variables
df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
}
# Compute ranks for interaction variables # Compute ranks for interaction variables
df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep") df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep") df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
}
} }
# Initialize list to store plot configurations # Initialize list to store plot configurations
@@ -783,10 +781,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L",
} }
} }
return(list( return(configs)
adjusted_df = df,
plot_configs = configs
))
} }
generate_correlation_plot_configs <- function(df, variables) { generate_correlation_plot_configs <- function(df, variables) {
@@ -1230,7 +1225,7 @@ main <- function() {
interaction_vars = interaction_vars, interaction_vars = interaction_vars,
is_lm = FALSE, is_lm = FALSE,
adjust = TRUE adjust = TRUE
)$plot_configs )
# Save the generated rank plots for L and K # Save the generated rank plots for L and K
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
@@ -1242,7 +1237,7 @@ main <- function() {
interaction_vars = interaction_vars, interaction_vars = interaction_vars,
is_lm = TRUE, is_lm = TRUE,
adjust = TRUE adjust = TRUE
)$plot_configs )
# Save the linear model based rank plots for L and K # Save the linear model based rank plots for L and K
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
@@ -1250,19 +1245,18 @@ main <- function() {
message("Filtering and regenerating rank plots") message("Filtering and regenerating rank plots")
# Filter rows where either Z_lm_L or Avg_Zscore_L is not NA # Filter rows where either Z_lm_L or Avg_Zscore_L is not NA
# Formerly X_NArm
zscores_interactions_filtered <- zscores_interactions %>% zscores_interactions_filtered <- zscores_interactions %>%
group_by(across(all_of(orf_group_vars))) %>% group_by(across(all_of(orf_group_vars))) %>%
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>% filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
ungroup() ungroup() %>%
# Final filtered correlation calculations and Overlap column
zscores_interactions_filtered <- zscores_interactions_filtered %>%
rowwise() %>% rowwise() %>%
mutate( mutate(
lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA, lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA, lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA, lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA, lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA,
Overlap = case_when( Overlap = case_when(
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both", Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both", Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both",