diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index d6c7a65b..74c1ae58 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -238,7 +238,7 @@ calculate_l_2sd_of_k <- function(df, df_stats_by_k) { df_within_2sd_k <- df_within_2sd_k %>% select(names(df)) df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df)) - list(df_within_2sd_k = df_within_2sd_k, df_outside_2sd_k = df_outside_2sd_k) + list(within_2sd_k = df_within_2sd_k, outside_2sd_k = df_outside_2sd_k) } # Ensure all plots are saved and printed to PDF @@ -289,178 +289,88 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) return(df_strains) } + calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, - df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) { + df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables) { - # Initialize the dataframe - interaction_scores <- df %>% - distinct(OrfRep) %>% - mutate( - Gene = NA, - raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, - r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, - raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, - r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, - raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, - r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, - raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, - r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, - NG = NA, DB = NA, SM = NA - ) - - # The list to hold all of the combined interactions - interaction_scores_list <- list() - - # TODO is this necessary? - # Initialize the dataframe - interaction_scores <- interaction_scores %>% - mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA - - for (i in seq_len(nrow(interaction_scores))) { - gene_sel <- interaction_scores$OrfRep[i] - df_gene_sel <- df %>% filter(OrfRep == gene_sel) - - df_stats_interaction <- df_gene_sel %>% - group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% - summarise( - N = n(), - across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE), - NG = sum(NG, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) %>% - ungroup() - - shifts <- df_stats_interaction %>% - mutate( - raw_shift_l = L_mean - background_means$L, - z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1], - raw_shift_k = K_mean - background_means$K, - z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1], - raw_shift_r = r_mean - background_means$r, - z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1], - raw_shift_auc = AUC_mean - background_means$AUC, - z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1] - ) - - linear_models <- shifts %>% - summarise( - lm_l = list(lm(L_mean ~ conc_num_factor)), - lm_k = list(lm(K_mean ~ conc_num_factor)), - lm_r = list(lm(r_mean ~ conc_num_factor)), - lm_auc = list(lm(AUC_mean ~ conc_num_factor)) - ) - - z_scores <- shifts %>% - mutate( - exp_l = background_means$L + raw_shift_l, - delta_l = L_mean - exp_l, - exp_k = background_means$K + raw_shift_k, - delta_k = K_mean - exp_k, - exp_r = background_means$r + raw_shift_r, - delta_r = r_mean - exp_r, - exp_auc = background_means$AUC + raw_shift_auc, - delta_auc = AUC_mean - exp_auc, - zscore_l = delta_l / df_stats_by_l$L_sd, - zscore_k = delta_k / df_stats_by_k$K_sd, - zscore_r = delta_r / df_stats_by_r$r_sd, - zscore_auc = delta_auc / df_stats_by_auc$AUC_sd, - sum_z_score_l = sum(zscore_l, na.rm = TRUE), - avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)), - sum_z_score_k = sum(zscore_k, na.rm = TRUE), - avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)), - sum_z_score_r = sum(zscore_r, na.rm = TRUE), - avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)), - sum_z_score_auc = sum(zscore_auc, na.rm = TRUE), - avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE)) - ) - - # Update interaction_scores with calculated values - interaction_scores[i, ] <- interaction_scores[i, ] %>% - mutate( - Raw_Shift_L = shifts$raw_shift_l[1], - Z_Shift_L = shifts$z_shift_l[1], - lm_Score_L = max_conc * coef(linear_models$lm_l)[2] + coef(linear_models$lm_l)[1], - R_Squared_L = summary(linear_models$lm_l)$r.squared, - Sum_Z_Score_L = z_scores$sum_z_score_l[1], - Avg_Zscore_L = z_scores$avg_zscore_l[1], - Raw_Shift_K = shifts$raw_shift_k[1], - Z_Shift_K = shifts$z_shift_k[1], - lm_Score_K = max_conc * coef(linear_models$lm_k)[2] + coef(linear_models$lm_k)[1], - R_Squared_K = summary(linear_models$lm_k)$r.squared, - Sum_Z_Score_K = z_scores$sum_z_score_k[1], - Avg_Zscore_K = z_scores$avg_zscore_k[1], - Raw_Shift_r = shifts$raw_shift_r[1], - Z_Shift_r = shifts$z_shift_r[1], - lm_Score_r = max_conc * coef(linear_models$lm_r)[2] + coef(linear_models$lm_r)[1], - R_Squared_r = summary(linear_models$lm_r)$r.squared, - Sum_Z_Score_r = z_scores$sum_z_score_r[1], - Avg_Zscore_r = z_scores$avg_zscore_r[1], - Raw_Shift_AUC = shifts$raw_shift_auc[1], - Z_Shift_AUC = shifts$z_shift_auc[1], - lm_Score_AUC = max_conc * coef(linear_models$lm_auc)[2] + coef(linear_models$lm_auc)[1], - R_Squared_AUC = summary(linear_models$lm_auc)$r.squared, - Sum_Z_Score_AUC = z_scores$sum_z_score_auc[1], - Avg_Zscore_AUC = z_scores$avg_zscore_auc[1], - NG = sum(df_stats_interaction$NG, na.rm = TRUE), - DB = sum(df_stats_interaction$DB, na.rm = TRUE), - SM = sum(df_stats_interaction$SM, na.rm = TRUE) - ) - - interaction_scores_list[[i]] <- interaction_scores - } - - return(interaction_scores_list) -} - - -filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) { - - # Arrange the interaction scores by Z_lm_L and NG - interaction_scores <- interaction_scores %>% - arrange(desc(Z_lm_L), desc(NG)) + # Calculate all necessary statistics and shifts in one step + df_stats_interaction_all <- df %>% + group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>% + summarise( + N = n(), + across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE), + NG = sum(NG, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE), + raw_shift_l = mean(L, na.rm = TRUE) - background_means$L, + raw_shift_k = mean(K, na.rm = TRUE) - background_means$K, + raw_shift_r = mean(r, na.rm = TRUE) - background_means$r, + raw_shift_auc = mean(AUC, na.rm = TRUE) - background_means$AUC, + z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1], + z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1], + z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1], + z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1], + exp_l = background_means$L + raw_shift_l, + exp_k = background_means$K + raw_shift_k, + exp_r = background_means$r + raw_shift_r, + exp_auc = background_means$AUC + raw_shift_auc, + delta_l = mean(L, na.rm = TRUE) - exp_l, + delta_k = mean(K, na.rm = TRUE) - exp_k, + delta_r = mean(r, na.rm = TRUE) - exp_r, + delta_auc = mean(AUC, na.rm = TRUE) - exp_auc, + zscore_l = delta_l / df_stats_by_l$L_sd, + zscore_k = delta_k / df_stats_by_k$K_sd, + zscore_r = delta_r / df_stats_by_r$r_sd, + zscore_auc = delta_auc / df_stats_by_auc$AUC_sd, + sum_z_score_l = sum(zscore_l, na.rm = TRUE), + avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)), + sum_z_score_k = sum(zscore_k, na.rm = TRUE), + avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)), + sum_z_score_r = sum(zscore_r, na.rm = TRUE), + avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)), + sum_z_score_auc = sum(zscore_auc, na.rm = TRUE), + avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE)) + ) %>% + ungroup() - filters <- list( - list(name = "_ZScores_Interaction.csv", - filter = interaction_scores), - list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv", - filter = filter(interaction_scores, Avg_Zscore_L >= 2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv", - filter = filter(interaction_scores, Avg_Zscore_K <= -2)), - list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv", - filter = filter(interaction_scores, Avg_Zscore_L <= -2)), - list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv", - filter = filter(interaction_scores, Avg_Zscore_K >= 2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv", - filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv", - filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)), - list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv", - filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)), - list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv", - filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)), - list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv", - filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)), - list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv", - filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv", - filter = filter(interaction_scores, Z_lm_L >= 2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv", - filter = filter(interaction_scores, Z_lm_K <= -2)), - list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv", - filter = filter(interaction_scores, Z_lm_L <= -2)), - list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv", - filter = filter(interaction_scores, Z_lm_K >= 2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv", - filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)), - list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv", - filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2)) - ) + # Calculate linear models and interaction scores + interaction_scores <- df_stats_interaction_all %>% + group_by(OrfRep, Gene, num) %>% + summarise( + lm_l = list(lm(delta_l ~ conc_num_factor)), + lm_k = list(lm(delta_k ~ conc_num_factor)), + lm_r = list(lm(delta_r ~ conc_num_factor)), + lm_auc = list(lm(delta_auc ~ conc_num_factor)), + lm_Score_L = max_conc * coef(lm_l[[1]])[2] + coef(lm_l[[1]])[1], + lm_Score_K = max_conc * coef(lm_k[[1]])[2] + coef(lm_k[[1]])[1], + lm_Score_r = max_conc * coef(lm_r[[1]])[2] + coef(lm_r[[1]])[1], + lm_Score_AUC = max_conc * coef(lm_auc[[1]])[2] + coef(lm_auc[[1]])[1], + R_Squared_L = summary(lm_l[[1]])$r.squared, + R_Squared_K = summary(lm_k[[1]])$r.squared, + R_Squared_r = summary(lm_r[[1]])$r.squared, + R_Squared_AUC = summary(lm_auc[[1]])$r.squared, + Sum_Z_Score_L = sum(zscore_l, na.rm = TRUE), + Avg_Zscore_L = avg_zscore_l[1], + Sum_Z_Score_K = sum(zscore_k, na.rm = TRUE), + Avg_Zscore_K = avg_zscore_k[1], + Sum_Z_Score_r = sum(zscore_r, na.rm = TRUE), + Avg_Zscore_r = avg_zscore_r[1], + Sum_Z_Score_AUC = sum(zscore_auc, na.rm = TRUE), + Avg_Zscore_AUC = avg_zscore_auc[1], + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE) + ) %>% + ungroup() + + # Ensure interaction_scores has only the specified columns + interaction_scores <- interaction_scores %>% + select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l, + raw_shift_k, z_shift_k, lm_Score_K, Z_lm_K = lm_Score_K, R_Squared_K, sum_z_score_k, avg_zscore_k, + raw_shift_r, z_shift_r, lm_Score_r, Z_lm_r = lm_Score_r, R_Squared_r, sum_z_score_r, avg_zscore_r, + raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc, + NG, DB, SM) - # Iterate over each filter and save the corresponding CSV file - for (item in filters) { - file_name <- paste0(prefix, item$name) - write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE) - } + return(list(zscores_calculations = df_stats_interaction_all, zscores_interactions = interaction_scores)) } @@ -512,11 +422,11 @@ main <- function() { write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) # Calculate the pre-background stats once - df_stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) - df_stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars)) - df_stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars)) - df_stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars)) - df_stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars)) + stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) + stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars)) + stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars)) + stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars)) + stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars)) # Process background strains background_strains <- c("YDL227C") @@ -535,11 +445,11 @@ main <- function() { filter(!is.na(L)) # Recalculate summary statistics for the background strain - df_stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) - df_stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars)) - df_stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars)) - df_stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars)) - df_stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars)) + stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) + stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars)) + stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars)) + stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars)) + stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars)) # Backup in case previous block explodes # Combine all summary statistics into one dataframe @@ -549,30 +459,30 @@ main <- function() { # left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor")) # Save the summary statistics for the background strain - write.csv(df_stats_background, + write.csv(stats_background, file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), row.names = FALSE) # Calculate L values within and outside 2SD of K - results_2sd <- calculate_l_2sd_of_k(df_background, df_stats_by_k_background) - df_within_2sd_k <- results_2sd$df_within_2sd_k - df_outside_2sd_k <- results_2sd$df_outside_2sd_k + results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background) + within_2sd_k <- results_2sd$within_2sd_k + outside_2sd_k <- results_2sd$outside_2sd_k # Calculate summary statistics for L within and outside 2SD of K - df_stats_by_l_within_2sd_k <- calculate_summary_stats(df_within_2sd_k, "L") - write.csv(df_stats_by_l_within_2sd_k, + l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L") + write.csv(l_within_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE) - df_stats_by_l_outside_2sd_k <- calculate_summary_stats(df_outside_2sd_k, "L") - write.csv(df_stats_by_l_outside_2sd_k, + l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L") + write.csv(l_outside_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE) - generate_and_save_plots(df_outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") + generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") - background_means <- calculate_background_means(df_stats_by_l_background, - df_stats_by_k_background, df_stats_by_r_background, df_stats_by_auc_background) + background_means <- calculate_background_means(stats_by_l_background, + stats_by_k_background, stats_by_r_background, stats_by_auc_background) df_reference <- df_na %>% filter(OrfRep == strain) %>% @@ -582,52 +492,81 @@ main <- function() { filter(OrfRep != strain) %>% mutate(SM = 0) - df_reference_strains <- process_strains(df_reference, df_stats_by_l_within_2sd_k, strain, out_dir) - write.csv(df_reference_strains, - file = file.path(output_dir, paste0("Processed_reference_strains_", strain, ".csv")), - row.names = FALSE) + df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir) + df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir) - df_deletion_strains <- process_strains(df_deletion, df_stats_by_l_within_2sd_k, strain, out_dir) - write.csv(df_deletion_strains, - file = file.path(output_dir, paste0("Processed_deletion_strains_", strain, ".csv")), - row.names = FALSE) - - # TODO: we are mutating OrfRep to make it more unique, is this sensible? + # TODO: we are mutating OrfRep to make it more unique, is this a sensible approach? + # Should probably create a new column? # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score - df_reference_strains <- df_reference_strains %>% - mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) + # df_reference_strains <- df_reference_strains %>% + # mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) # num_genes <- length(unique(df_reference_strains$OrfRep)) # print(num_genes) variables <- c("L", "K", "r", "AUC") - interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l, - df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) - interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l, - df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) - - write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) - write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) - - - - - - - - # write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) - - # Filter and save interaction scores for reference strains - filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF") - - # Filter and save interaction scores for deletion strains - filter_and_save_interaction_scores(interaction_scores_deletion, out_dir, "Deletion") + reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k, + stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir) + # Process deletion strains + deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k, + stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir) + + zscores_calculations_reference <- reference_results$zscores_calculations + zscores_interactions_reference <- reference_results$zscores_interactions + zscores_calculations <- deletion_results$zscores_calculations + zscores_interactions <- deletion_results$zscores_interactions + + write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) + write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE) + write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) + write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) + + # Define conditions for enhancers and suppressors + enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2 + suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2 + enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2 + suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2 + + # Subset data + enhancers_L <- zscores_interactions[enhancer_condition_L, ] + suppressors_L <- zscores_interactions[suppressor_condition_L, ] + enhancers_K <- zscores_interactions[enhancer_condition_K, ] + suppressors_K <- zscores_interactions[suppressor_condition_K, ] + + # Save enhancers and suppressors + write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) + write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) + write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) + write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE) + + # Combine conditions for enhancers and suppressors + enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ] + enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ] + + # Save combined enhancers and suppressors + write.csv(enhancers_and_suppressors_L, + file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE) + write.csv(enhancers_and_suppressors_K, + file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE) + + # Handle linear model based enhancers and suppressors + enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ] + suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ] + enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ] + suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ] + + # Save linear model based enhancers and suppressors + write.csv(enhancers_lm_L, + file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) + write.csv(suppressors_lm_L, + file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE) + write.csv(enhancers_lm_K, + file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE) + write.csv(suppressors_lm_K, + file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) # Generate plots for interaction scores - # plot_interaction_scores(interaction_scores_reference, out_dir) # Assuming plot_interaction_scores is your plotting function - - }) }) }