Auto-commit: apps/r/calculate_interaction_zscores5.R
This commit is contained in:
@@ -238,7 +238,7 @@ calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
|
||||
df_within_2sd_k <- df_within_2sd_k %>% select(names(df))
|
||||
df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df))
|
||||
|
||||
list(df_within_2sd_k = df_within_2sd_k, df_outside_2sd_k = df_outside_2sd_k)
|
||||
list(within_2sd_k = df_within_2sd_k, outside_2sd_k = df_outside_2sd_k)
|
||||
}
|
||||
|
||||
# Ensure all plots are saved and printed to PDF
|
||||
@@ -289,77 +289,34 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
|
||||
return(df_strains)
|
||||
}
|
||||
|
||||
|
||||
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
||||
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) {
|
||||
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables) {
|
||||
|
||||
# Initialize the dataframe
|
||||
interaction_scores <- df %>%
|
||||
distinct(OrfRep) %>%
|
||||
mutate(
|
||||
Gene = NA,
|
||||
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
|
||||
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
|
||||
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
|
||||
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
|
||||
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
|
||||
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
|
||||
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
|
||||
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
|
||||
NG = NA, DB = NA, SM = NA
|
||||
)
|
||||
|
||||
# The list to hold all of the combined interactions
|
||||
interaction_scores_list <- list()
|
||||
|
||||
# TODO is this necessary?
|
||||
# Initialize the dataframe
|
||||
interaction_scores <- interaction_scores %>%
|
||||
mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA
|
||||
|
||||
for (i in seq_len(nrow(interaction_scores))) {
|
||||
gene_sel <- interaction_scores$OrfRep[i]
|
||||
df_gene_sel <- df %>% filter(OrfRep == gene_sel)
|
||||
|
||||
df_stats_interaction <- df_gene_sel %>%
|
||||
group_by(OrfRep, Gene, conc_num, conc_num_factor) %>%
|
||||
# Calculate all necessary statistics and shifts in one step
|
||||
df_stats_interaction_all <- df %>%
|
||||
group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>%
|
||||
summarise(
|
||||
N = n(),
|
||||
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE)
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
shifts <- df_stats_interaction %>%
|
||||
mutate(
|
||||
raw_shift_l = L_mean - background_means$L,
|
||||
SM = sum(SM, na.rm = TRUE),
|
||||
raw_shift_l = mean(L, na.rm = TRUE) - background_means$L,
|
||||
raw_shift_k = mean(K, na.rm = TRUE) - background_means$K,
|
||||
raw_shift_r = mean(r, na.rm = TRUE) - background_means$r,
|
||||
raw_shift_auc = mean(AUC, na.rm = TRUE) - background_means$AUC,
|
||||
z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
|
||||
raw_shift_k = K_mean - background_means$K,
|
||||
z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
|
||||
raw_shift_r = r_mean - background_means$r,
|
||||
z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
|
||||
raw_shift_auc = AUC_mean - background_means$AUC,
|
||||
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1]
|
||||
)
|
||||
|
||||
linear_models <- shifts %>%
|
||||
summarise(
|
||||
lm_l = list(lm(L_mean ~ conc_num_factor)),
|
||||
lm_k = list(lm(K_mean ~ conc_num_factor)),
|
||||
lm_r = list(lm(r_mean ~ conc_num_factor)),
|
||||
lm_auc = list(lm(AUC_mean ~ conc_num_factor))
|
||||
)
|
||||
|
||||
z_scores <- shifts %>%
|
||||
mutate(
|
||||
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1],
|
||||
exp_l = background_means$L + raw_shift_l,
|
||||
delta_l = L_mean - exp_l,
|
||||
exp_k = background_means$K + raw_shift_k,
|
||||
delta_k = K_mean - exp_k,
|
||||
exp_r = background_means$r + raw_shift_r,
|
||||
delta_r = r_mean - exp_r,
|
||||
exp_auc = background_means$AUC + raw_shift_auc,
|
||||
delta_auc = AUC_mean - exp_auc,
|
||||
delta_l = mean(L, na.rm = TRUE) - exp_l,
|
||||
delta_k = mean(K, na.rm = TRUE) - exp_k,
|
||||
delta_r = mean(r, na.rm = TRUE) - exp_r,
|
||||
delta_auc = mean(AUC, na.rm = TRUE) - exp_auc,
|
||||
zscore_l = delta_l / df_stats_by_l$L_sd,
|
||||
zscore_k = delta_k / df_stats_by_k$K_sd,
|
||||
zscore_r = delta_r / df_stats_by_r$r_sd,
|
||||
@@ -372,95 +329,48 @@ calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
||||
avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||
sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
|
||||
avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
|
||||
)
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
# Update interaction_scores with calculated values
|
||||
interaction_scores[i, ] <- interaction_scores[i, ] %>%
|
||||
mutate(
|
||||
Raw_Shift_L = shifts$raw_shift_l[1],
|
||||
Z_Shift_L = shifts$z_shift_l[1],
|
||||
lm_Score_L = max_conc * coef(linear_models$lm_l)[2] + coef(linear_models$lm_l)[1],
|
||||
R_Squared_L = summary(linear_models$lm_l)$r.squared,
|
||||
Sum_Z_Score_L = z_scores$sum_z_score_l[1],
|
||||
Avg_Zscore_L = z_scores$avg_zscore_l[1],
|
||||
Raw_Shift_K = shifts$raw_shift_k[1],
|
||||
Z_Shift_K = shifts$z_shift_k[1],
|
||||
lm_Score_K = max_conc * coef(linear_models$lm_k)[2] + coef(linear_models$lm_k)[1],
|
||||
R_Squared_K = summary(linear_models$lm_k)$r.squared,
|
||||
Sum_Z_Score_K = z_scores$sum_z_score_k[1],
|
||||
Avg_Zscore_K = z_scores$avg_zscore_k[1],
|
||||
Raw_Shift_r = shifts$raw_shift_r[1],
|
||||
Z_Shift_r = shifts$z_shift_r[1],
|
||||
lm_Score_r = max_conc * coef(linear_models$lm_r)[2] + coef(linear_models$lm_r)[1],
|
||||
R_Squared_r = summary(linear_models$lm_r)$r.squared,
|
||||
Sum_Z_Score_r = z_scores$sum_z_score_r[1],
|
||||
Avg_Zscore_r = z_scores$avg_zscore_r[1],
|
||||
Raw_Shift_AUC = shifts$raw_shift_auc[1],
|
||||
Z_Shift_AUC = shifts$z_shift_auc[1],
|
||||
lm_Score_AUC = max_conc * coef(linear_models$lm_auc)[2] + coef(linear_models$lm_auc)[1],
|
||||
R_Squared_AUC = summary(linear_models$lm_auc)$r.squared,
|
||||
Sum_Z_Score_AUC = z_scores$sum_z_score_auc[1],
|
||||
Avg_Zscore_AUC = z_scores$avg_zscore_auc[1],
|
||||
NG = sum(df_stats_interaction$NG, na.rm = TRUE),
|
||||
DB = sum(df_stats_interaction$DB, na.rm = TRUE),
|
||||
SM = sum(df_stats_interaction$SM, na.rm = TRUE)
|
||||
)
|
||||
# Calculate linear models and interaction scores
|
||||
interaction_scores <- df_stats_interaction_all %>%
|
||||
group_by(OrfRep, Gene, num) %>%
|
||||
summarise(
|
||||
lm_l = list(lm(delta_l ~ conc_num_factor)),
|
||||
lm_k = list(lm(delta_k ~ conc_num_factor)),
|
||||
lm_r = list(lm(delta_r ~ conc_num_factor)),
|
||||
lm_auc = list(lm(delta_auc ~ conc_num_factor)),
|
||||
lm_Score_L = max_conc * coef(lm_l[[1]])[2] + coef(lm_l[[1]])[1],
|
||||
lm_Score_K = max_conc * coef(lm_k[[1]])[2] + coef(lm_k[[1]])[1],
|
||||
lm_Score_r = max_conc * coef(lm_r[[1]])[2] + coef(lm_r[[1]])[1],
|
||||
lm_Score_AUC = max_conc * coef(lm_auc[[1]])[2] + coef(lm_auc[[1]])[1],
|
||||
R_Squared_L = summary(lm_l[[1]])$r.squared,
|
||||
R_Squared_K = summary(lm_k[[1]])$r.squared,
|
||||
R_Squared_r = summary(lm_r[[1]])$r.squared,
|
||||
R_Squared_AUC = summary(lm_auc[[1]])$r.squared,
|
||||
Sum_Z_Score_L = sum(zscore_l, na.rm = TRUE),
|
||||
Avg_Zscore_L = avg_zscore_l[1],
|
||||
Sum_Z_Score_K = sum(zscore_k, na.rm = TRUE),
|
||||
Avg_Zscore_K = avg_zscore_k[1],
|
||||
Sum_Z_Score_r = sum(zscore_r, na.rm = TRUE),
|
||||
Avg_Zscore_r = avg_zscore_r[1],
|
||||
Sum_Z_Score_AUC = sum(zscore_auc, na.rm = TRUE),
|
||||
Avg_Zscore_AUC = avg_zscore_auc[1],
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
DB = sum(DB, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE)
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
interaction_scores_list[[i]] <- interaction_scores
|
||||
}
|
||||
|
||||
return(interaction_scores_list)
|
||||
}
|
||||
|
||||
|
||||
filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) {
|
||||
|
||||
# Arrange the interaction scores by Z_lm_L and NG
|
||||
# Ensure interaction_scores has only the specified columns
|
||||
interaction_scores <- interaction_scores %>%
|
||||
arrange(desc(Z_lm_L), desc(NG))
|
||||
select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l,
|
||||
raw_shift_k, z_shift_k, lm_Score_K, Z_lm_K = lm_Score_K, R_Squared_K, sum_z_score_k, avg_zscore_k,
|
||||
raw_shift_r, z_shift_r, lm_Score_r, Z_lm_r = lm_Score_r, R_Squared_r, sum_z_score_r, avg_zscore_r,
|
||||
raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc,
|
||||
NG, DB, SM)
|
||||
|
||||
filters <- list(
|
||||
list(name = "_ZScores_Interaction.csv",
|
||||
filter = interaction_scores),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv",
|
||||
filter = filter(interaction_scores, Avg_Zscore_L >= 2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv",
|
||||
filter = filter(interaction_scores, Avg_Zscore_K <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv",
|
||||
filter = filter(interaction_scores, Avg_Zscore_L <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv",
|
||||
filter = filter(interaction_scores, Avg_Zscore_K >= 2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",
|
||||
filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",
|
||||
filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)),
|
||||
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",
|
||||
filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)),
|
||||
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",
|
||||
filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)),
|
||||
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",
|
||||
filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)),
|
||||
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",
|
||||
filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv",
|
||||
filter = filter(interaction_scores, Z_lm_L >= 2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv",
|
||||
filter = filter(interaction_scores, Z_lm_K <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv",
|
||||
filter = filter(interaction_scores, Z_lm_L <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv",
|
||||
filter = filter(interaction_scores, Z_lm_K >= 2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",
|
||||
filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)),
|
||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",
|
||||
filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2))
|
||||
)
|
||||
|
||||
# Iterate over each filter and save the corresponding CSV file
|
||||
for (item in filters) {
|
||||
file_name <- paste0(prefix, item$name)
|
||||
write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE)
|
||||
}
|
||||
return(list(zscores_calculations = df_stats_interaction_all, zscores_interactions = interaction_scores))
|
||||
}
|
||||
|
||||
|
||||
@@ -512,11 +422,11 @@ main <- function() {
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
|
||||
# Calculate the pre-background stats once
|
||||
df_stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
df_stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars))
|
||||
df_stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars))
|
||||
df_stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars))
|
||||
df_stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars))
|
||||
stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars))
|
||||
stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars))
|
||||
stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars))
|
||||
stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars))
|
||||
|
||||
# Process background strains
|
||||
background_strains <- c("YDL227C")
|
||||
@@ -535,11 +445,11 @@ main <- function() {
|
||||
filter(!is.na(L))
|
||||
|
||||
# Recalculate summary statistics for the background strain
|
||||
df_stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
df_stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars))
|
||||
df_stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars))
|
||||
df_stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars))
|
||||
df_stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars))
|
||||
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars))
|
||||
stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars))
|
||||
stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars))
|
||||
stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars))
|
||||
|
||||
# Backup in case previous block explodes
|
||||
# Combine all summary statistics into one dataframe
|
||||
@@ -549,30 +459,30 @@ main <- function() {
|
||||
# left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
|
||||
# Save the summary statistics for the background strain
|
||||
write.csv(df_stats_background,
|
||||
write.csv(stats_background,
|
||||
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
|
||||
# Calculate L values within and outside 2SD of K
|
||||
results_2sd <- calculate_l_2sd_of_k(df_background, df_stats_by_k_background)
|
||||
df_within_2sd_k <- results_2sd$df_within_2sd_k
|
||||
df_outside_2sd_k <- results_2sd$df_outside_2sd_k
|
||||
results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
|
||||
within_2sd_k <- results_2sd$within_2sd_k
|
||||
outside_2sd_k <- results_2sd$outside_2sd_k
|
||||
|
||||
# Calculate summary statistics for L within and outside 2SD of K
|
||||
df_stats_by_l_within_2sd_k <- calculate_summary_stats(df_within_2sd_k, "L")
|
||||
write.csv(df_stats_by_l_within_2sd_k,
|
||||
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L")
|
||||
write.csv(l_within_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
df_stats_by_l_outside_2sd_k <- calculate_summary_stats(df_outside_2sd_k, "L")
|
||||
write.csv(df_stats_by_l_outside_2sd_k,
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L")
|
||||
write.csv(l_outside_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
generate_and_save_plots(df_outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
||||
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
||||
|
||||
background_means <- calculate_background_means(df_stats_by_l_background,
|
||||
df_stats_by_k_background, df_stats_by_r_background, df_stats_by_auc_background)
|
||||
background_means <- calculate_background_means(stats_by_l_background,
|
||||
stats_by_k_background, stats_by_r_background, stats_by_auc_background)
|
||||
|
||||
df_reference <- df_na %>%
|
||||
filter(OrfRep == strain) %>%
|
||||
@@ -582,52 +492,81 @@ main <- function() {
|
||||
filter(OrfRep != strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
df_reference_strains <- process_strains(df_reference, df_stats_by_l_within_2sd_k, strain, out_dir)
|
||||
write.csv(df_reference_strains,
|
||||
file = file.path(output_dir, paste0("Processed_reference_strains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir)
|
||||
df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir)
|
||||
|
||||
df_deletion_strains <- process_strains(df_deletion, df_stats_by_l_within_2sd_k, strain, out_dir)
|
||||
write.csv(df_deletion_strains,
|
||||
file = file.path(output_dir, paste0("Processed_deletion_strains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
|
||||
# TODO: we are mutating OrfRep to make it more unique, is this sensible?
|
||||
# TODO: we are mutating OrfRep to make it more unique, is this a sensible approach?
|
||||
# Should probably create a new column?
|
||||
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
|
||||
df_reference_strains <- df_reference_strains %>%
|
||||
mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
||||
# df_reference_strains <- df_reference_strains %>%
|
||||
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
||||
|
||||
# num_genes <- length(unique(df_reference_strains$OrfRep))
|
||||
# print(num_genes)
|
||||
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l,
|
||||
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||
interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l,
|
||||
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||
reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k,
|
||||
stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||
|
||||
write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||
write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
||||
# Process deletion strains
|
||||
deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k,
|
||||
stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||
|
||||
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||
zscores_interactions_reference <- reference_results$zscores_interactions
|
||||
zscores_calculations <- deletion_results$zscores_calculations
|
||||
zscores_interactions <- deletion_results$zscores_interactions
|
||||
|
||||
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
||||
|
||||
# Define conditions for enhancers and suppressors
|
||||
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2
|
||||
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2
|
||||
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2
|
||||
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2
|
||||
|
||||
# Subset data
|
||||
enhancers_L <- zscores_interactions[enhancer_condition_L, ]
|
||||
suppressors_L <- zscores_interactions[suppressor_condition_L, ]
|
||||
enhancers_K <- zscores_interactions[enhancer_condition_K, ]
|
||||
suppressors_K <- zscores_interactions[suppressor_condition_K, ]
|
||||
|
||||
# Save enhancers and suppressors
|
||||
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE)
|
||||
|
||||
# Combine conditions for enhancers and suppressors
|
||||
enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
|
||||
enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ]
|
||||
|
||||
# write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||
# Save combined enhancers and suppressors
|
||||
write.csv(enhancers_and_suppressors_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_and_suppressors_K,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
|
||||
|
||||
# Filter and save interaction scores for reference strains
|
||||
filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF")
|
||||
|
||||
# Filter and save interaction scores for deletion strains
|
||||
filter_and_save_interaction_scores(interaction_scores_deletion, out_dir, "Deletion")
|
||||
# Handle linear model based enhancers and suppressors
|
||||
enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ]
|
||||
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ]
|
||||
enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ]
|
||||
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
|
||||
|
||||
# Save linear model based enhancers and suppressors
|
||||
write.csv(enhancers_lm_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_lm_K,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_K,
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
|
||||
|
||||
# Generate plots for interaction scores
|
||||
# plot_interaction_scores(interaction_scores_reference, out_dir) # Assuming plot_interaction_scores is your plotting function
|
||||
|
||||
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user