Auto-commit: apps/r/calculate_interaction_zscores5.R
This commit is contained in:
@@ -238,7 +238,7 @@ calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
|
|||||||
df_within_2sd_k <- df_within_2sd_k %>% select(names(df))
|
df_within_2sd_k <- df_within_2sd_k %>% select(names(df))
|
||||||
df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df))
|
df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df))
|
||||||
|
|
||||||
list(df_within_2sd_k = df_within_2sd_k, df_outside_2sd_k = df_outside_2sd_k)
|
list(within_2sd_k = df_within_2sd_k, outside_2sd_k = df_outside_2sd_k)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Ensure all plots are saved and printed to PDF
|
# Ensure all plots are saved and printed to PDF
|
||||||
@@ -289,178 +289,88 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
|
|||||||
return(df_strains)
|
return(df_strains)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
||||||
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) {
|
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables) {
|
||||||
|
|
||||||
# Initialize the dataframe
|
# Calculate all necessary statistics and shifts in one step
|
||||||
interaction_scores <- df %>%
|
df_stats_interaction_all <- df %>%
|
||||||
distinct(OrfRep) %>%
|
group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>%
|
||||||
mutate(
|
summarise(
|
||||||
Gene = NA,
|
N = n(),
|
||||||
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
|
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
|
||||||
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
|
NG = sum(NG, na.rm = TRUE),
|
||||||
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
|
SM = sum(SM, na.rm = TRUE),
|
||||||
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
|
raw_shift_l = mean(L, na.rm = TRUE) - background_means$L,
|
||||||
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
|
raw_shift_k = mean(K, na.rm = TRUE) - background_means$K,
|
||||||
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
|
raw_shift_r = mean(r, na.rm = TRUE) - background_means$r,
|
||||||
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
|
raw_shift_auc = mean(AUC, na.rm = TRUE) - background_means$AUC,
|
||||||
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
|
z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
|
||||||
NG = NA, DB = NA, SM = NA
|
z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
|
||||||
)
|
z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
|
||||||
|
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1],
|
||||||
# The list to hold all of the combined interactions
|
exp_l = background_means$L + raw_shift_l,
|
||||||
interaction_scores_list <- list()
|
exp_k = background_means$K + raw_shift_k,
|
||||||
|
exp_r = background_means$r + raw_shift_r,
|
||||||
# TODO is this necessary?
|
exp_auc = background_means$AUC + raw_shift_auc,
|
||||||
# Initialize the dataframe
|
delta_l = mean(L, na.rm = TRUE) - exp_l,
|
||||||
interaction_scores <- interaction_scores %>%
|
delta_k = mean(K, na.rm = TRUE) - exp_k,
|
||||||
mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA
|
delta_r = mean(r, na.rm = TRUE) - exp_r,
|
||||||
|
delta_auc = mean(AUC, na.rm = TRUE) - exp_auc,
|
||||||
for (i in seq_len(nrow(interaction_scores))) {
|
zscore_l = delta_l / df_stats_by_l$L_sd,
|
||||||
gene_sel <- interaction_scores$OrfRep[i]
|
zscore_k = delta_k / df_stats_by_k$K_sd,
|
||||||
df_gene_sel <- df %>% filter(OrfRep == gene_sel)
|
zscore_r = delta_r / df_stats_by_r$r_sd,
|
||||||
|
zscore_auc = delta_auc / df_stats_by_auc$AUC_sd,
|
||||||
df_stats_interaction <- df_gene_sel %>%
|
sum_z_score_l = sum(zscore_l, na.rm = TRUE),
|
||||||
group_by(OrfRep, Gene, conc_num, conc_num_factor) %>%
|
avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||||
summarise(
|
sum_z_score_k = sum(zscore_k, na.rm = TRUE),
|
||||||
N = n(),
|
avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||||
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
|
sum_z_score_r = sum(zscore_r, na.rm = TRUE),
|
||||||
NG = sum(NG, na.rm = TRUE),
|
avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||||
SM = sum(SM, na.rm = TRUE)
|
sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
|
||||||
) %>%
|
avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
|
||||||
ungroup()
|
) %>%
|
||||||
|
ungroup()
|
||||||
shifts <- df_stats_interaction %>%
|
|
||||||
mutate(
|
|
||||||
raw_shift_l = L_mean - background_means$L,
|
|
||||||
z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
|
|
||||||
raw_shift_k = K_mean - background_means$K,
|
|
||||||
z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
|
|
||||||
raw_shift_r = r_mean - background_means$r,
|
|
||||||
z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
|
|
||||||
raw_shift_auc = AUC_mean - background_means$AUC,
|
|
||||||
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1]
|
|
||||||
)
|
|
||||||
|
|
||||||
linear_models <- shifts %>%
|
|
||||||
summarise(
|
|
||||||
lm_l = list(lm(L_mean ~ conc_num_factor)),
|
|
||||||
lm_k = list(lm(K_mean ~ conc_num_factor)),
|
|
||||||
lm_r = list(lm(r_mean ~ conc_num_factor)),
|
|
||||||
lm_auc = list(lm(AUC_mean ~ conc_num_factor))
|
|
||||||
)
|
|
||||||
|
|
||||||
z_scores <- shifts %>%
|
|
||||||
mutate(
|
|
||||||
exp_l = background_means$L + raw_shift_l,
|
|
||||||
delta_l = L_mean - exp_l,
|
|
||||||
exp_k = background_means$K + raw_shift_k,
|
|
||||||
delta_k = K_mean - exp_k,
|
|
||||||
exp_r = background_means$r + raw_shift_r,
|
|
||||||
delta_r = r_mean - exp_r,
|
|
||||||
exp_auc = background_means$AUC + raw_shift_auc,
|
|
||||||
delta_auc = AUC_mean - exp_auc,
|
|
||||||
zscore_l = delta_l / df_stats_by_l$L_sd,
|
|
||||||
zscore_k = delta_k / df_stats_by_k$K_sd,
|
|
||||||
zscore_r = delta_r / df_stats_by_r$r_sd,
|
|
||||||
zscore_auc = delta_auc / df_stats_by_auc$AUC_sd,
|
|
||||||
sum_z_score_l = sum(zscore_l, na.rm = TRUE),
|
|
||||||
avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)),
|
|
||||||
sum_z_score_k = sum(zscore_k, na.rm = TRUE),
|
|
||||||
avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)),
|
|
||||||
sum_z_score_r = sum(zscore_r, na.rm = TRUE),
|
|
||||||
avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
|
|
||||||
sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
|
|
||||||
avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update interaction_scores with calculated values
|
|
||||||
interaction_scores[i, ] <- interaction_scores[i, ] %>%
|
|
||||||
mutate(
|
|
||||||
Raw_Shift_L = shifts$raw_shift_l[1],
|
|
||||||
Z_Shift_L = shifts$z_shift_l[1],
|
|
||||||
lm_Score_L = max_conc * coef(linear_models$lm_l)[2] + coef(linear_models$lm_l)[1],
|
|
||||||
R_Squared_L = summary(linear_models$lm_l)$r.squared,
|
|
||||||
Sum_Z_Score_L = z_scores$sum_z_score_l[1],
|
|
||||||
Avg_Zscore_L = z_scores$avg_zscore_l[1],
|
|
||||||
Raw_Shift_K = shifts$raw_shift_k[1],
|
|
||||||
Z_Shift_K = shifts$z_shift_k[1],
|
|
||||||
lm_Score_K = max_conc * coef(linear_models$lm_k)[2] + coef(linear_models$lm_k)[1],
|
|
||||||
R_Squared_K = summary(linear_models$lm_k)$r.squared,
|
|
||||||
Sum_Z_Score_K = z_scores$sum_z_score_k[1],
|
|
||||||
Avg_Zscore_K = z_scores$avg_zscore_k[1],
|
|
||||||
Raw_Shift_r = shifts$raw_shift_r[1],
|
|
||||||
Z_Shift_r = shifts$z_shift_r[1],
|
|
||||||
lm_Score_r = max_conc * coef(linear_models$lm_r)[2] + coef(linear_models$lm_r)[1],
|
|
||||||
R_Squared_r = summary(linear_models$lm_r)$r.squared,
|
|
||||||
Sum_Z_Score_r = z_scores$sum_z_score_r[1],
|
|
||||||
Avg_Zscore_r = z_scores$avg_zscore_r[1],
|
|
||||||
Raw_Shift_AUC = shifts$raw_shift_auc[1],
|
|
||||||
Z_Shift_AUC = shifts$z_shift_auc[1],
|
|
||||||
lm_Score_AUC = max_conc * coef(linear_models$lm_auc)[2] + coef(linear_models$lm_auc)[1],
|
|
||||||
R_Squared_AUC = summary(linear_models$lm_auc)$r.squared,
|
|
||||||
Sum_Z_Score_AUC = z_scores$sum_z_score_auc[1],
|
|
||||||
Avg_Zscore_AUC = z_scores$avg_zscore_auc[1],
|
|
||||||
NG = sum(df_stats_interaction$NG, na.rm = TRUE),
|
|
||||||
DB = sum(df_stats_interaction$DB, na.rm = TRUE),
|
|
||||||
SM = sum(df_stats_interaction$SM, na.rm = TRUE)
|
|
||||||
)
|
|
||||||
|
|
||||||
interaction_scores_list[[i]] <- interaction_scores
|
|
||||||
}
|
|
||||||
|
|
||||||
return(interaction_scores_list)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) {
|
|
||||||
|
|
||||||
# Arrange the interaction scores by Z_lm_L and NG
|
|
||||||
interaction_scores <- interaction_scores %>%
|
|
||||||
arrange(desc(Z_lm_L), desc(NG))
|
|
||||||
|
|
||||||
filters <- list(
|
# Calculate linear models and interaction scores
|
||||||
list(name = "_ZScores_Interaction.csv",
|
interaction_scores <- df_stats_interaction_all %>%
|
||||||
filter = interaction_scores),
|
group_by(OrfRep, Gene, num) %>%
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv",
|
summarise(
|
||||||
filter = filter(interaction_scores, Avg_Zscore_L >= 2)),
|
lm_l = list(lm(delta_l ~ conc_num_factor)),
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv",
|
lm_k = list(lm(delta_k ~ conc_num_factor)),
|
||||||
filter = filter(interaction_scores, Avg_Zscore_K <= -2)),
|
lm_r = list(lm(delta_r ~ conc_num_factor)),
|
||||||
list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv",
|
lm_auc = list(lm(delta_auc ~ conc_num_factor)),
|
||||||
filter = filter(interaction_scores, Avg_Zscore_L <= -2)),
|
lm_Score_L = max_conc * coef(lm_l[[1]])[2] + coef(lm_l[[1]])[1],
|
||||||
list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv",
|
lm_Score_K = max_conc * coef(lm_k[[1]])[2] + coef(lm_k[[1]])[1],
|
||||||
filter = filter(interaction_scores, Avg_Zscore_K >= 2)),
|
lm_Score_r = max_conc * coef(lm_r[[1]])[2] + coef(lm_r[[1]])[1],
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",
|
lm_Score_AUC = max_conc * coef(lm_auc[[1]])[2] + coef(lm_auc[[1]])[1],
|
||||||
filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)),
|
R_Squared_L = summary(lm_l[[1]])$r.squared,
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",
|
R_Squared_K = summary(lm_k[[1]])$r.squared,
|
||||||
filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)),
|
R_Squared_r = summary(lm_r[[1]])$r.squared,
|
||||||
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",
|
R_Squared_AUC = summary(lm_auc[[1]])$r.squared,
|
||||||
filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)),
|
Sum_Z_Score_L = sum(zscore_l, na.rm = TRUE),
|
||||||
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",
|
Avg_Zscore_L = avg_zscore_l[1],
|
||||||
filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)),
|
Sum_Z_Score_K = sum(zscore_k, na.rm = TRUE),
|
||||||
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",
|
Avg_Zscore_K = avg_zscore_k[1],
|
||||||
filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)),
|
Sum_Z_Score_r = sum(zscore_r, na.rm = TRUE),
|
||||||
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",
|
Avg_Zscore_r = avg_zscore_r[1],
|
||||||
filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)),
|
Sum_Z_Score_AUC = sum(zscore_auc, na.rm = TRUE),
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv",
|
Avg_Zscore_AUC = avg_zscore_auc[1],
|
||||||
filter = filter(interaction_scores, Z_lm_L >= 2)),
|
NG = sum(NG, na.rm = TRUE),
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv",
|
DB = sum(DB, na.rm = TRUE),
|
||||||
filter = filter(interaction_scores, Z_lm_K <= -2)),
|
SM = sum(SM, na.rm = TRUE)
|
||||||
list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv",
|
) %>%
|
||||||
filter = filter(interaction_scores, Z_lm_L <= -2)),
|
ungroup()
|
||||||
list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv",
|
|
||||||
filter = filter(interaction_scores, Z_lm_K >= 2)),
|
# Ensure interaction_scores has only the specified columns
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",
|
interaction_scores <- interaction_scores %>%
|
||||||
filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)),
|
select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l,
|
||||||
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",
|
raw_shift_k, z_shift_k, lm_Score_K, Z_lm_K = lm_Score_K, R_Squared_K, sum_z_score_k, avg_zscore_k,
|
||||||
filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2))
|
raw_shift_r, z_shift_r, lm_Score_r, Z_lm_r = lm_Score_r, R_Squared_r, sum_z_score_r, avg_zscore_r,
|
||||||
)
|
raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc,
|
||||||
|
NG, DB, SM)
|
||||||
|
|
||||||
# Iterate over each filter and save the corresponding CSV file
|
return(list(zscores_calculations = df_stats_interaction_all, zscores_interactions = interaction_scores))
|
||||||
for (item in filters) {
|
|
||||||
file_name <- paste0(prefix, item$name)
|
|
||||||
write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -512,11 +422,11 @@ main <- function() {
|
|||||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Calculate the pre-background stats once
|
# Calculate the pre-background stats once
|
||||||
df_stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||||
df_stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars))
|
stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars))
|
||||||
df_stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars))
|
stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars))
|
||||||
df_stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars))
|
stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars))
|
||||||
df_stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars))
|
stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars))
|
||||||
|
|
||||||
# Process background strains
|
# Process background strains
|
||||||
background_strains <- c("YDL227C")
|
background_strains <- c("YDL227C")
|
||||||
@@ -535,11 +445,11 @@ main <- function() {
|
|||||||
filter(!is.na(L))
|
filter(!is.na(L))
|
||||||
|
|
||||||
# Recalculate summary statistics for the background strain
|
# Recalculate summary statistics for the background strain
|
||||||
df_stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||||
df_stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars))
|
stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars))
|
||||||
df_stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars))
|
stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars))
|
||||||
df_stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars))
|
stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars))
|
||||||
df_stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars))
|
stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars))
|
||||||
|
|
||||||
# Backup in case previous block explodes
|
# Backup in case previous block explodes
|
||||||
# Combine all summary statistics into one dataframe
|
# Combine all summary statistics into one dataframe
|
||||||
@@ -549,30 +459,30 @@ main <- function() {
|
|||||||
# left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor"))
|
# left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||||
|
|
||||||
# Save the summary statistics for the background strain
|
# Save the summary statistics for the background strain
|
||||||
write.csv(df_stats_background,
|
write.csv(stats_background,
|
||||||
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
# Calculate L values within and outside 2SD of K
|
# Calculate L values within and outside 2SD of K
|
||||||
results_2sd <- calculate_l_2sd_of_k(df_background, df_stats_by_k_background)
|
results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
|
||||||
df_within_2sd_k <- results_2sd$df_within_2sd_k
|
within_2sd_k <- results_2sd$within_2sd_k
|
||||||
df_outside_2sd_k <- results_2sd$df_outside_2sd_k
|
outside_2sd_k <- results_2sd$outside_2sd_k
|
||||||
|
|
||||||
# Calculate summary statistics for L within and outside 2SD of K
|
# Calculate summary statistics for L within and outside 2SD of K
|
||||||
df_stats_by_l_within_2sd_k <- calculate_summary_stats(df_within_2sd_k, "L")
|
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L")
|
||||||
write.csv(df_stats_by_l_within_2sd_k,
|
write.csv(l_within_2sd_k,
|
||||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
df_stats_by_l_outside_2sd_k <- calculate_summary_stats(df_outside_2sd_k, "L")
|
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L")
|
||||||
write.csv(df_stats_by_l_outside_2sd_k,
|
write.csv(l_outside_2sd_k,
|
||||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
generate_and_save_plots(df_outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
||||||
|
|
||||||
background_means <- calculate_background_means(df_stats_by_l_background,
|
background_means <- calculate_background_means(stats_by_l_background,
|
||||||
df_stats_by_k_background, df_stats_by_r_background, df_stats_by_auc_background)
|
stats_by_k_background, stats_by_r_background, stats_by_auc_background)
|
||||||
|
|
||||||
df_reference <- df_na %>%
|
df_reference <- df_na %>%
|
||||||
filter(OrfRep == strain) %>%
|
filter(OrfRep == strain) %>%
|
||||||
@@ -582,52 +492,81 @@ main <- function() {
|
|||||||
filter(OrfRep != strain) %>%
|
filter(OrfRep != strain) %>%
|
||||||
mutate(SM = 0)
|
mutate(SM = 0)
|
||||||
|
|
||||||
df_reference_strains <- process_strains(df_reference, df_stats_by_l_within_2sd_k, strain, out_dir)
|
df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir)
|
||||||
write.csv(df_reference_strains,
|
df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir)
|
||||||
file = file.path(output_dir, paste0("Processed_reference_strains_", strain, ".csv")),
|
|
||||||
row.names = FALSE)
|
|
||||||
|
|
||||||
df_deletion_strains <- process_strains(df_deletion, df_stats_by_l_within_2sd_k, strain, out_dir)
|
# TODO: we are mutating OrfRep to make it more unique, is this a sensible approach?
|
||||||
write.csv(df_deletion_strains,
|
# Should probably create a new column?
|
||||||
file = file.path(output_dir, paste0("Processed_deletion_strains_", strain, ".csv")),
|
|
||||||
row.names = FALSE)
|
|
||||||
|
|
||||||
# TODO: we are mutating OrfRep to make it more unique, is this sensible?
|
|
||||||
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
|
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
|
||||||
df_reference_strains <- df_reference_strains %>%
|
# df_reference_strains <- df_reference_strains %>%
|
||||||
mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
||||||
|
|
||||||
# num_genes <- length(unique(df_reference_strains$OrfRep))
|
# num_genes <- length(unique(df_reference_strains$OrfRep))
|
||||||
# print(num_genes)
|
# print(num_genes)
|
||||||
|
|
||||||
variables <- c("L", "K", "r", "AUC")
|
variables <- c("L", "K", "r", "AUC")
|
||||||
interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l,
|
reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k,
|
||||||
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
|
stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||||
interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l,
|
|
||||||
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
|
|
||||||
|
|
||||||
write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
|
||||||
write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
# Filter and save interaction scores for reference strains
|
|
||||||
filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF")
|
|
||||||
|
|
||||||
# Filter and save interaction scores for deletion strains
|
|
||||||
filter_and_save_interaction_scores(interaction_scores_deletion, out_dir, "Deletion")
|
|
||||||
|
|
||||||
|
# Process deletion strains
|
||||||
|
deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k,
|
||||||
|
stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||||
|
|
||||||
|
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||||
|
zscores_interactions_reference <- reference_results$zscores_interactions
|
||||||
|
zscores_calculations <- deletion_results$zscores_calculations
|
||||||
|
zscores_interactions <- deletion_results$zscores_interactions
|
||||||
|
|
||||||
|
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||||
|
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
|
||||||
|
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||||
|
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# Define conditions for enhancers and suppressors
|
||||||
|
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2
|
||||||
|
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2
|
||||||
|
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2
|
||||||
|
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2
|
||||||
|
|
||||||
|
# Subset data
|
||||||
|
enhancers_L <- zscores_interactions[enhancer_condition_L, ]
|
||||||
|
suppressors_L <- zscores_interactions[suppressor_condition_L, ]
|
||||||
|
enhancers_K <- zscores_interactions[enhancer_condition_K, ]
|
||||||
|
suppressors_K <- zscores_interactions[suppressor_condition_K, ]
|
||||||
|
|
||||||
|
# Save enhancers and suppressors
|
||||||
|
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE)
|
||||||
|
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE)
|
||||||
|
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE)
|
||||||
|
write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# Combine conditions for enhancers and suppressors
|
||||||
|
enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
|
||||||
|
enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ]
|
||||||
|
|
||||||
|
# Save combined enhancers and suppressors
|
||||||
|
write.csv(enhancers_and_suppressors_L,
|
||||||
|
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE)
|
||||||
|
write.csv(enhancers_and_suppressors_K,
|
||||||
|
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# Handle linear model based enhancers and suppressors
|
||||||
|
enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ]
|
||||||
|
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ]
|
||||||
|
enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ]
|
||||||
|
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
|
||||||
|
|
||||||
|
# Save linear model based enhancers and suppressors
|
||||||
|
write.csv(enhancers_lm_L,
|
||||||
|
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE)
|
||||||
|
write.csv(suppressors_lm_L,
|
||||||
|
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE)
|
||||||
|
write.csv(enhancers_lm_K,
|
||||||
|
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE)
|
||||||
|
write.csv(suppressors_lm_K,
|
||||||
|
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Generate plots for interaction scores
|
# Generate plots for interaction scores
|
||||||
# plot_interaction_scores(interaction_scores_reference, out_dir) # Assuming plot_interaction_scores is your plotting function
|
|
||||||
|
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user