Auto-commit: apps/r/calculate_interaction_zscores5.R

This commit is contained in:
2024-08-31 14:59:38 -04:00
parent 7b0b2c9bbd
commit b139956df5

View File

@@ -238,7 +238,7 @@ calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
df_within_2sd_k <- df_within_2sd_k %>% select(names(df)) df_within_2sd_k <- df_within_2sd_k %>% select(names(df))
df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df)) df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df))
list(df_within_2sd_k = df_within_2sd_k, df_outside_2sd_k = df_outside_2sd_k) list(within_2sd_k = df_within_2sd_k, outside_2sd_k = df_outside_2sd_k)
} }
# Ensure all plots are saved and printed to PDF # Ensure all plots are saved and printed to PDF
@@ -289,178 +289,88 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
return(df_strains) return(df_strains)
} }
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) { df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables) {
# Initialize the dataframe # Calculate all necessary statistics and shifts in one step
interaction_scores <- df %>% df_stats_interaction_all <- df %>%
distinct(OrfRep) %>% group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>%
mutate( summarise(
Gene = NA, N = n(),
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, NG = sum(NG, na.rm = TRUE),
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, SM = sum(SM, na.rm = TRUE),
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, raw_shift_l = mean(L, na.rm = TRUE) - background_means$L,
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, raw_shift_k = mean(K, na.rm = TRUE) - background_means$K,
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, raw_shift_r = mean(r, na.rm = TRUE) - background_means$r,
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, raw_shift_auc = mean(AUC, na.rm = TRUE) - background_means$AUC,
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
NG = NA, DB = NA, SM = NA z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
) z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1],
exp_l = background_means$L + raw_shift_l,
exp_k = background_means$K + raw_shift_k,
exp_r = background_means$r + raw_shift_r,
exp_auc = background_means$AUC + raw_shift_auc,
delta_l = mean(L, na.rm = TRUE) - exp_l,
delta_k = mean(K, na.rm = TRUE) - exp_k,
delta_r = mean(r, na.rm = TRUE) - exp_r,
delta_auc = mean(AUC, na.rm = TRUE) - exp_auc,
zscore_l = delta_l / df_stats_by_l$L_sd,
zscore_k = delta_k / df_stats_by_k$K_sd,
zscore_r = delta_r / df_stats_by_r$r_sd,
zscore_auc = delta_auc / df_stats_by_auc$AUC_sd,
sum_z_score_l = sum(zscore_l, na.rm = TRUE),
avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)),
sum_z_score_k = sum(zscore_k, na.rm = TRUE),
avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)),
sum_z_score_r = sum(zscore_r, na.rm = TRUE),
avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
) %>%
ungroup()
# The list to hold all of the combined interactions # Calculate linear models and interaction scores
interaction_scores_list <- list() interaction_scores <- df_stats_interaction_all %>%
group_by(OrfRep, Gene, num) %>%
summarise(
lm_l = list(lm(delta_l ~ conc_num_factor)),
lm_k = list(lm(delta_k ~ conc_num_factor)),
lm_r = list(lm(delta_r ~ conc_num_factor)),
lm_auc = list(lm(delta_auc ~ conc_num_factor)),
lm_Score_L = max_conc * coef(lm_l[[1]])[2] + coef(lm_l[[1]])[1],
lm_Score_K = max_conc * coef(lm_k[[1]])[2] + coef(lm_k[[1]])[1],
lm_Score_r = max_conc * coef(lm_r[[1]])[2] + coef(lm_r[[1]])[1],
lm_Score_AUC = max_conc * coef(lm_auc[[1]])[2] + coef(lm_auc[[1]])[1],
R_Squared_L = summary(lm_l[[1]])$r.squared,
R_Squared_K = summary(lm_k[[1]])$r.squared,
R_Squared_r = summary(lm_r[[1]])$r.squared,
R_Squared_AUC = summary(lm_auc[[1]])$r.squared,
Sum_Z_Score_L = sum(zscore_l, na.rm = TRUE),
Avg_Zscore_L = avg_zscore_l[1],
Sum_Z_Score_K = sum(zscore_k, na.rm = TRUE),
Avg_Zscore_K = avg_zscore_k[1],
Sum_Z_Score_r = sum(zscore_r, na.rm = TRUE),
Avg_Zscore_r = avg_zscore_r[1],
Sum_Z_Score_AUC = sum(zscore_auc, na.rm = TRUE),
Avg_Zscore_AUC = avg_zscore_auc[1],
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
) %>%
ungroup()
# TODO is this necessary? # Ensure interaction_scores has only the specified columns
# Initialize the dataframe
interaction_scores <- interaction_scores %>% interaction_scores <- interaction_scores %>%
mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l,
raw_shift_k, z_shift_k, lm_Score_K, Z_lm_K = lm_Score_K, R_Squared_K, sum_z_score_k, avg_zscore_k,
raw_shift_r, z_shift_r, lm_Score_r, Z_lm_r = lm_Score_r, R_Squared_r, sum_z_score_r, avg_zscore_r,
raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc,
NG, DB, SM)
for (i in seq_len(nrow(interaction_scores))) { return(list(zscores_calculations = df_stats_interaction_all, zscores_interactions = interaction_scores))
gene_sel <- interaction_scores$OrfRep[i]
df_gene_sel <- df %>% filter(OrfRep == gene_sel)
df_stats_interaction <- df_gene_sel %>%
group_by(OrfRep, Gene, conc_num, conc_num_factor) %>%
summarise(
N = n(),
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
NG = sum(NG, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
) %>%
ungroup()
shifts <- df_stats_interaction %>%
mutate(
raw_shift_l = L_mean - background_means$L,
z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
raw_shift_k = K_mean - background_means$K,
z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
raw_shift_r = r_mean - background_means$r,
z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
raw_shift_auc = AUC_mean - background_means$AUC,
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1]
)
linear_models <- shifts %>%
summarise(
lm_l = list(lm(L_mean ~ conc_num_factor)),
lm_k = list(lm(K_mean ~ conc_num_factor)),
lm_r = list(lm(r_mean ~ conc_num_factor)),
lm_auc = list(lm(AUC_mean ~ conc_num_factor))
)
z_scores <- shifts %>%
mutate(
exp_l = background_means$L + raw_shift_l,
delta_l = L_mean - exp_l,
exp_k = background_means$K + raw_shift_k,
delta_k = K_mean - exp_k,
exp_r = background_means$r + raw_shift_r,
delta_r = r_mean - exp_r,
exp_auc = background_means$AUC + raw_shift_auc,
delta_auc = AUC_mean - exp_auc,
zscore_l = delta_l / df_stats_by_l$L_sd,
zscore_k = delta_k / df_stats_by_k$K_sd,
zscore_r = delta_r / df_stats_by_r$r_sd,
zscore_auc = delta_auc / df_stats_by_auc$AUC_sd,
sum_z_score_l = sum(zscore_l, na.rm = TRUE),
avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)),
sum_z_score_k = sum(zscore_k, na.rm = TRUE),
avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)),
sum_z_score_r = sum(zscore_r, na.rm = TRUE),
avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
)
# Update interaction_scores with calculated values
interaction_scores[i, ] <- interaction_scores[i, ] %>%
mutate(
Raw_Shift_L = shifts$raw_shift_l[1],
Z_Shift_L = shifts$z_shift_l[1],
lm_Score_L = max_conc * coef(linear_models$lm_l)[2] + coef(linear_models$lm_l)[1],
R_Squared_L = summary(linear_models$lm_l)$r.squared,
Sum_Z_Score_L = z_scores$sum_z_score_l[1],
Avg_Zscore_L = z_scores$avg_zscore_l[1],
Raw_Shift_K = shifts$raw_shift_k[1],
Z_Shift_K = shifts$z_shift_k[1],
lm_Score_K = max_conc * coef(linear_models$lm_k)[2] + coef(linear_models$lm_k)[1],
R_Squared_K = summary(linear_models$lm_k)$r.squared,
Sum_Z_Score_K = z_scores$sum_z_score_k[1],
Avg_Zscore_K = z_scores$avg_zscore_k[1],
Raw_Shift_r = shifts$raw_shift_r[1],
Z_Shift_r = shifts$z_shift_r[1],
lm_Score_r = max_conc * coef(linear_models$lm_r)[2] + coef(linear_models$lm_r)[1],
R_Squared_r = summary(linear_models$lm_r)$r.squared,
Sum_Z_Score_r = z_scores$sum_z_score_r[1],
Avg_Zscore_r = z_scores$avg_zscore_r[1],
Raw_Shift_AUC = shifts$raw_shift_auc[1],
Z_Shift_AUC = shifts$z_shift_auc[1],
lm_Score_AUC = max_conc * coef(linear_models$lm_auc)[2] + coef(linear_models$lm_auc)[1],
R_Squared_AUC = summary(linear_models$lm_auc)$r.squared,
Sum_Z_Score_AUC = z_scores$sum_z_score_auc[1],
Avg_Zscore_AUC = z_scores$avg_zscore_auc[1],
NG = sum(df_stats_interaction$NG, na.rm = TRUE),
DB = sum(df_stats_interaction$DB, na.rm = TRUE),
SM = sum(df_stats_interaction$SM, na.rm = TRUE)
)
interaction_scores_list[[i]] <- interaction_scores
}
return(interaction_scores_list)
}
filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) {
# Arrange the interaction scores by Z_lm_L and NG
interaction_scores <- interaction_scores %>%
arrange(desc(Z_lm_L), desc(NG))
filters <- list(
list(name = "_ZScores_Interaction.csv",
filter = interaction_scores),
list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv",
filter = filter(interaction_scores, Avg_Zscore_L >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv",
filter = filter(interaction_scores, Avg_Zscore_K <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv",
filter = filter(interaction_scores, Avg_Zscore_L <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv",
filter = filter(interaction_scores, Avg_Zscore_K >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",
filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",
filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)),
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",
filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)),
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",
filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)),
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",
filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)),
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",
filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv",
filter = filter(interaction_scores, Z_lm_L >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv",
filter = filter(interaction_scores, Z_lm_K <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv",
filter = filter(interaction_scores, Z_lm_L <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv",
filter = filter(interaction_scores, Z_lm_K >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",
filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",
filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2))
)
# Iterate over each filter and save the corresponding CSV file
for (item in filters) {
file_name <- paste0(prefix, item$name)
write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE)
}
} }
@@ -512,11 +422,11 @@ main <- function() {
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
# Calculate the pre-background stats once # Calculate the pre-background stats once
df_stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
df_stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars)) stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars))
df_stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars)) stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars))
df_stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars)) stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars))
df_stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars)) stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars))
# Process background strains # Process background strains
background_strains <- c("YDL227C") background_strains <- c("YDL227C")
@@ -535,11 +445,11 @@ main <- function() {
filter(!is.na(L)) filter(!is.na(L))
# Recalculate summary statistics for the background strain # Recalculate summary statistics for the background strain
df_stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
df_stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars)) stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars))
df_stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars)) stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars))
df_stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars)) stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars))
df_stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars)) stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars))
# Backup in case previous block explodes # Backup in case previous block explodes
# Combine all summary statistics into one dataframe # Combine all summary statistics into one dataframe
@@ -549,30 +459,30 @@ main <- function() {
# left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor")) # left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor"))
# Save the summary statistics for the background strain # Save the summary statistics for the background strain
write.csv(df_stats_background, write.csv(stats_background,
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
row.names = FALSE) row.names = FALSE)
# Calculate L values within and outside 2SD of K # Calculate L values within and outside 2SD of K
results_2sd <- calculate_l_2sd_of_k(df_background, df_stats_by_k_background) results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
df_within_2sd_k <- results_2sd$df_within_2sd_k within_2sd_k <- results_2sd$within_2sd_k
df_outside_2sd_k <- results_2sd$df_outside_2sd_k outside_2sd_k <- results_2sd$outside_2sd_k
# Calculate summary statistics for L within and outside 2SD of K # Calculate summary statistics for L within and outside 2SD of K
df_stats_by_l_within_2sd_k <- calculate_summary_stats(df_within_2sd_k, "L") l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L")
write.csv(df_stats_by_l_within_2sd_k, write.csv(l_within_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
row.names = FALSE) row.names = FALSE)
df_stats_by_l_outside_2sd_k <- calculate_summary_stats(df_outside_2sd_k, "L") l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L")
write.csv(df_stats_by_l_outside_2sd_k, write.csv(l_outside_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
row.names = FALSE) row.names = FALSE)
generate_and_save_plots(df_outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
background_means <- calculate_background_means(df_stats_by_l_background, background_means <- calculate_background_means(stats_by_l_background,
df_stats_by_k_background, df_stats_by_r_background, df_stats_by_auc_background) stats_by_k_background, stats_by_r_background, stats_by_auc_background)
df_reference <- df_na %>% df_reference <- df_na %>%
filter(OrfRep == strain) %>% filter(OrfRep == strain) %>%
@@ -582,52 +492,81 @@ main <- function() {
filter(OrfRep != strain) %>% filter(OrfRep != strain) %>%
mutate(SM = 0) mutate(SM = 0)
df_reference_strains <- process_strains(df_reference, df_stats_by_l_within_2sd_k, strain, out_dir) df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir)
write.csv(df_reference_strains, df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir)
file = file.path(output_dir, paste0("Processed_reference_strains_", strain, ".csv")),
row.names = FALSE)
df_deletion_strains <- process_strains(df_deletion, df_stats_by_l_within_2sd_k, strain, out_dir) # TODO: we are mutating OrfRep to make it more unique, is this a sensible approach?
write.csv(df_deletion_strains, # Should probably create a new column?
file = file.path(output_dir, paste0("Processed_deletion_strains_", strain, ".csv")),
row.names = FALSE)
# TODO: we are mutating OrfRep to make it more unique, is this sensible?
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
df_reference_strains <- df_reference_strains %>% # df_reference_strains <- df_reference_strains %>%
mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) # mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
# num_genes <- length(unique(df_reference_strains$OrfRep)) # num_genes <- length(unique(df_reference_strains$OrfRep))
# print(num_genes) # print(num_genes)
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l, reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k,
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir)
interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l,
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) # Process deletion strains
write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k,
stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir)
zscores_calculations_reference <- reference_results$zscores_calculations
zscores_interactions_reference <- reference_results$zscores_interactions
zscores_calculations <- deletion_results$zscores_calculations
zscores_interactions <- deletion_results$zscores_interactions
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
# Define conditions for enhancers and suppressors
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2
# Subset data
enhancers_L <- zscores_interactions[enhancer_condition_L, ]
suppressors_L <- zscores_interactions[suppressor_condition_L, ]
enhancers_K <- zscores_interactions[enhancer_condition_K, ]
suppressors_K <- zscores_interactions[suppressor_condition_K, ]
# Save enhancers and suppressors
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE)
# Combine conditions for enhancers and suppressors
enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ]
# write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) # Save combined enhancers and suppressors
write.csv(enhancers_and_suppressors_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
# Filter and save interaction scores for reference strains # Handle linear model based enhancers and suppressors
filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF") enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ]
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ]
# Filter and save interaction scores for deletion strains enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ]
filter_and_save_interaction_scores(interaction_scores_deletion, out_dir, "Deletion") suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
# Save linear model based enhancers and suppressors
write.csv(enhancers_lm_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
write.csv(suppressors_lm_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE)
write.csv(enhancers_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE)
write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
# Generate plots for interaction scores # Generate plots for interaction scores
# plot_interaction_scores(interaction_scores_reference, out_dir) # Assuming plot_interaction_scores is your plotting function
}) })
}) })
} }