diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 259b5c19..db2b3d36 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -707,16 +707,16 @@ generate_interaction_plot_configs <- function(df, variables) { )) } -generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE) { +generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", "K"), is_lm = FALSE, adjust = FALSE) { # Adjust missing values and compute ranks for each interaction variable - for (var in interaction_vars) { - avg_zscore_col <- paste0("Avg_Zscore_", var) - z_lm_col <- paste0("Z_lm_", var) - rank_col <- paste0("Rank_", var) - rank_lm_col <- paste0("Rank_lm_", var) - - if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) { + if (adjust) { + for (var in interaction_vars) { + avg_zscore_col <- paste0("Avg_Zscore_", var) + z_lm_col <- paste0("Z_lm_", var) + rank_col <- paste0("Rank_", var) + rank_lm_col <- paste0("Rank_lm_", var) + # Replace NA with 0.001 for interaction variables df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]]) df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]]) @@ -724,8 +724,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", # Compute ranks for interaction variables df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep") df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep") - } else { - warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in the data frame")) + } } @@ -779,8 +778,7 @@ generate_rank_plot_configs <- function(df, interaction_vars, rank_vars = c("L", enhancer_label = NULL, suppressor_label = NULL, shape = 3, - size = 0.1, - position = "jitter" + size = 0.1 ) } } @@ -1226,18 +1224,12 @@ main <- function() { file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE) message("Generating rank plots") - # Generate rank plot configurations and adjust the dataframe - zscores_interactions_adjusted <- generate_rank_plot_configs( - df = zscores_interactions, - interaction_vars = interaction_vars, - is_lm = FALSE - )$adjusted_df - # Generate rank plots for L and K using standard ranks rank_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions_adjusted, + df = zscores_interactions, interaction_vars = interaction_vars, - is_lm = FALSE + is_lm = FALSE, + adjust = TRUE )$plot_configs # Save the generated rank plots for L and K @@ -1246,9 +1238,10 @@ main <- function() { # Generate rank plots for L and K using linear model (`lm`) ranks rank_lm_plot_configs <- generate_rank_plot_configs( - df = zscores_interactions_adjusted, + df = zscores_interactions, interaction_vars = interaction_vars, - is_lm = TRUE + is_lm = TRUE, + adjust = TRUE )$plot_configs # Save the linear model based rank plots for L and K @@ -1256,23 +1249,20 @@ main <- function() { plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) message("Filtering and regenerating rank plots") - # Formerly X_NArm + # Filter rows where either Z_lm_L or Avg_Zscore_L is not NA zscores_interactions_filtered <- zscores_interactions %>% group_by(across(all_of(orf_group_vars))) %>% - filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) - - # Final filtered correlation calculations and plots - lm_results <- zscores_interactions_filtered %>% - summarise( + filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>% + ungroup() + + # Final filtered correlation calculations and Overlap column + zscores_interactions_filtered <- zscores_interactions_filtered %>% + rowwise() %>% + mutate( lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA, lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA, lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA, - lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA - ) - - zscores_interactions_filtered <- zscores_interactions_filtered %>% - left_join(lm_results, by = orf_group_vars) %>% - mutate( + lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA, Overlap = case_when( Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both", Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both", @@ -1285,24 +1275,32 @@ main <- function() { ) %>% ungroup() - rank_plot_configs <- c( - generate_rank_plot_configs(zscores_interactions_filtered, "Rank_L", "Avg_Zscore_L", "L"), - generate_rank_plot_configs(zscores_interactions_filtered, "Rank_K", "Avg_Zscore_K", "K") - ) - generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", - plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) + message("Generating filtered rank plots") + rank_plot_filtered_configs <- generate_rank_plot_configs( + df = zscores_interactions_filtered, + interaction_vars = interaction_vars, + is_lm = FALSE, + adjust = FALSE + )$plot_configs + generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_na_rm", + plot_configs = rank_plot_filtered_configs, + grid_layout = list(ncol = 3, nrow = 2)) - rank_lm_plot_configs <- c( - generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE), - generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE) - ) - generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", - plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) + rank_plot_lm_filtered_configs <- generate_rank_plot_configs( + df = zscores_interactions_filtered, + interaction_vars = interaction_vars, + is_lm = TRUE, + adjust = FALSE + )$plot_configs + generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm_na_rm", + plot_configs = rank_plot_lm_filtered_configs, + grid_layout = list(ncol = 3, nrow = 2)) message("Generating correlation plots") correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, interaction_vars) generate_and_save_plots(output_dir = out_dir, file_name = "Avg_Zscore_vs_lm_NA_rm", - plot_configs = correlation_plot_configs, grid_layout = list(ncol = 2, nrow = 2)) + plot_configs = correlation_plot_configs, + grid_layout = list(ncol = 2, nrow = 2)) }) }) }