|
@@ -567,37 +567,6 @@ generate_box_plot <- function(plot, config) {
|
|
return(plot)
|
|
return(plot)
|
|
}
|
|
}
|
|
|
|
|
|
-# Adjust missing values and calculate ranks
|
|
|
|
-adjust_missing_and_rank <- function(df, variables) {
|
|
|
|
-
|
|
|
|
- # Loop over each variable
|
|
|
|
- for (var in variables) {
|
|
|
|
- # Construct column names
|
|
|
|
- avg_zscore_col <- paste0("Avg_Zscore_", var)
|
|
|
|
- z_lm_col <- paste0("Z_lm_", var)
|
|
|
|
- rank_col <- paste0("Rank_", var)
|
|
|
|
- rank_lm_col <- paste0("Rank_lm_", var)
|
|
|
|
-
|
|
|
|
- # Check if the columns exist in the data frame
|
|
|
|
- if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
|
|
|
|
- # Adjust missing values by replacing NA with 0.001
|
|
|
|
- df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
|
|
|
|
- df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
|
|
|
|
-
|
|
|
|
- # Compute ranks and create new columns
|
|
|
|
- df[[rank_col]] <- rank(df[[avg_zscore_col]])
|
|
|
|
- df[[rank_lm_col]] <- rank(df[[z_lm_col]])
|
|
|
|
- } else {
|
|
|
|
- warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in data frame"))
|
|
|
|
- }
|
|
|
|
- # Print some debugging output
|
|
|
|
- print(df %>% select("OrfRep", "Gene", "conc_num", "conc_num_factor",
|
|
|
|
- avg_zscore_col, z_lm_col, rank_col, rank_lm_col))
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return(df)
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
generate_interaction_plot_configs <- function(df, variables) {
|
|
generate_interaction_plot_configs <- function(df, variables) {
|
|
configs <- list()
|
|
configs <- list()
|
|
|
|
|
|
@@ -738,46 +707,90 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
))
|
|
))
|
|
}
|
|
}
|
|
|
|
|
|
-generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
|
|
|
|
|
|
+generate_rank_plot_configs <- function(df, interaction_vars, rank_vars, is_lm = FALSE) {
|
|
|
|
+ # Adjust missing values and compute ranks for each interaction variable
|
|
|
|
+ for (var in interaction_vars) {
|
|
|
|
+ avg_zscore_col <- paste0("Avg_Zscore_", var)
|
|
|
|
+ z_lm_col <- paste0("Z_lm_", var)
|
|
|
|
+ rank_col <- paste0("Rank_", var)
|
|
|
|
+ rank_lm_col <- paste0("Rank_lm_", var)
|
|
|
|
+
|
|
|
|
+ if (all(c(avg_zscore_col, z_lm_col) %in% names(df))) {
|
|
|
|
+ # Replace NA with 0.001 for interaction variables
|
|
|
|
+ df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
|
|
|
|
+ df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
|
|
|
|
+
|
|
|
|
+ # Compute ranks for interaction variables
|
|
|
|
+ df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
|
|
|
|
+ df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
|
|
|
|
+ } else {
|
|
|
|
+ warning(paste("Columns", avg_zscore_col, "or", z_lm_col, "not found in the data frame"))
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # Initialize list to store plot configurations
|
|
configs <- list()
|
|
configs <- list()
|
|
|
|
|
|
- plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
|
|
|
|
-
|
|
|
|
- # Single config with all sd bands
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df,
|
|
|
|
- x_var = rank_var,
|
|
|
|
- y_var = zscore_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = paste(plot_title_prefix, var, "Rank Plot"),
|
|
|
|
- sd_band = c(1, 2, 3), # Pass all sd bands at once
|
|
|
|
- enhancer_label = list(
|
|
|
|
- x = nrow(df) / 2, y = 10,
|
|
|
|
- label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= 1, ])) # Example for the first SD band
|
|
|
|
- ),
|
|
|
|
- suppressor_label = list(
|
|
|
|
- x = nrow(df) / 2, y = -10,
|
|
|
|
- label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -1, ]))
|
|
|
|
- ),
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1
|
|
|
|
- )
|
|
|
|
|
|
+ # Generate plot configurations for rank variables
|
|
|
|
+ for (var in rank_vars) {
|
|
|
|
+ if (is_lm) {
|
|
|
|
+ rank_var <- paste0("Rank_lm_", var)
|
|
|
|
+ zscore_var <- paste0("Z_lm_", var)
|
|
|
|
+ plot_title_prefix <- "Interaction Z score vs. Rank for"
|
|
|
|
+ } else {
|
|
|
|
+ rank_var <- paste0("Rank_", var)
|
|
|
|
+ zscore_var <- paste0("Avg_Zscore_", var)
|
|
|
|
+ plot_title_prefix <- "Average Z score vs. Rank for"
|
|
|
|
+ }
|
|
|
|
|
|
- # Non-annotated version
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df,
|
|
|
|
- x_var = rank_var,
|
|
|
|
- y_var = zscore_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
|
|
|
|
- sd_band = c(1, 2, 3),
|
|
|
|
- enhancer_label = NULL,
|
|
|
|
- suppressor_label = NULL,
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1
|
|
|
|
- )
|
|
|
|
|
|
+ enhancers_count <- nrow(df[df[[zscore_var]] >= 1, ])
|
|
|
|
+ suppressors_count <- nrow(df[df[[zscore_var]] <= -1, ])
|
|
|
|
|
|
- return(configs)
|
|
|
|
|
|
+ # Create Annotated Plot Configuration for rank variables
|
|
|
|
+ config_annotated <- list(
|
|
|
|
+ df = df,
|
|
|
|
+ x_var = rank_var,
|
|
|
|
+ y_var = zscore_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = paste(plot_title_prefix, var, "Rank Plot"),
|
|
|
|
+ sd_band = c(1, 2, 3),
|
|
|
|
+ enhancer_label = list(
|
|
|
|
+ x = nrow(df) / 2,
|
|
|
|
+ y = 10,
|
|
|
|
+ label = paste("Deletion Enhancers =", enhancers_count)
|
|
|
|
+ ),
|
|
|
|
+ suppressor_label = list(
|
|
|
|
+ x = nrow(df) / 2,
|
|
|
|
+ y = -10,
|
|
|
|
+ label = paste("Deletion Suppressors =", suppressors_count)
|
|
|
|
+ ),
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Create Non-Annotated Plot Configuration for rank variables
|
|
|
|
+ config_no_annotated <- list(
|
|
|
|
+ df = df,
|
|
|
|
+ x_var = rank_var,
|
|
|
|
+ y_var = zscore_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = paste(plot_title_prefix, var, "Rank Plot No Annotations"),
|
|
|
|
+ sd_band = c(1, 2, 3),
|
|
|
|
+ enhancer_label = NULL,
|
|
|
|
+ suppressor_label = NULL,
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Append configurations to the list
|
|
|
|
+ configs[[length(configs) + 1]] <- config_annotated
|
|
|
|
+ configs[[length(configs) + 1]] <- config_no_annotated
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return(list(
|
|
|
|
+ adjusted_df = df,
|
|
|
|
+ plot_configs = configs
|
|
|
|
+ ))
|
|
}
|
|
}
|
|
|
|
|
|
generate_correlation_plot_configs <- function(df, variables) {
|
|
generate_correlation_plot_configs <- function(df, variables) {
|
|
@@ -1214,24 +1227,41 @@ main <- function() {
|
|
write.csv(suppressors_lm_K,
|
|
write.csv(suppressors_lm_K,
|
|
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
|
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
|
|
|
|
|
- # TODO needs explanation
|
|
|
|
- message("Adjusting and ranking interactions scores")
|
|
|
|
- zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions, variables = interaction_vars)
|
|
|
|
-
|
|
|
|
message("Generating rank plots")
|
|
message("Generating rank plots")
|
|
- rank_plot_configs <- c(
|
|
|
|
- generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"),
|
|
|
|
- generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
|
|
|
|
|
|
+ rank_vars <- c("L", "K")
|
|
|
|
+
|
|
|
|
+ # Generate rank plot configurations and adjust the dataframe
|
|
|
|
+ rank_plot_results <- generate_rank_plot_configs(
|
|
|
|
+ df = zscores_interactions,
|
|
|
|
+ interaction_vars = interaction_vars,
|
|
|
|
+ rank_vars = rank_vars,
|
|
|
|
+ is_lm = FALSE
|
|
|
|
+ )
|
|
|
|
+ zscores_interactions_adjusted <- rank_plot_results$adjusted_df
|
|
|
|
+
|
|
|
|
+ # Generate and save standard rank plots
|
|
|
|
+ generate_and_save_plots(
|
|
|
|
+ output_dir = out_dir,
|
|
|
|
+ file_name = "RankPlots",
|
|
|
|
+ plot_configs = rank_plot_results$plot_configs,
|
|
|
|
+ grid_layout = list(ncol = 3, nrow = 2)
|
|
)
|
|
)
|
|
- generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
|
|
|
|
- plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
|
|
|
|
|
|
- rank_lm_plot_config <- c(
|
|
|
|
- generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
|
|
|
|
- generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
|
|
|
|
|
|
+ # Generate rank plot configurations for lm variables
|
|
|
|
+ rank_lm_plot_results <- generate_rank_plot_configs(
|
|
|
|
+ df = zscores_interactions_adjusted,
|
|
|
|
+ interaction_vars = interaction_vars,
|
|
|
|
+ rank_vars = rank_vars,
|
|
|
|
+ is_lm = TRUE
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Generate and save lm rank plots
|
|
|
|
+ generate_and_save_plots(
|
|
|
|
+ output_dir = out_dir,
|
|
|
|
+ file_name = "RankPlots_lm",
|
|
|
|
+ plot_configs = rank_lm_plot_results$plot_configs,
|
|
|
|
+ grid_layout = list(ncol = 3, nrow = 2)
|
|
)
|
|
)
|
|
- generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
|
|
|
|
- plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
|
|
|
|
|
|
|
|
# Formerly X_NArm
|
|
# Formerly X_NArm
|
|
zscores_interactions_filtered <- zscores_interactions %>%
|
|
zscores_interactions_filtered <- zscores_interactions %>%
|