More nteraction score data structure improvements
This commit is contained in:
@@ -256,15 +256,15 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
|
|||||||
Zscore_L = Delta_L / WT_sd_L,
|
Zscore_L = Delta_L / WT_sd_L,
|
||||||
Zscore_K = Delta_K / WT_sd_K,
|
Zscore_K = Delta_K / WT_sd_K,
|
||||||
Zscore_r = Delta_r / WT_sd_r,
|
Zscore_r = Delta_r / WT_sd_r,
|
||||||
Zscore_AUC = Delta_AUC / WT_sd_AUC,
|
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
||||||
) %>%
|
) %>%
|
||||||
group_modify(~ {
|
group_modify(~ {
|
||||||
|
# Perform linear models
|
||||||
lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
|
lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
|
||||||
lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
|
lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
|
||||||
lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
|
lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
|
||||||
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
|
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
|
||||||
|
|
||||||
# Return coefficients and R-squared values
|
|
||||||
.x %>%
|
.x %>%
|
||||||
mutate(
|
mutate(
|
||||||
lm_intercept_L = coef(lm_L)[1],
|
lm_intercept_L = coef(lm_L)[1],
|
||||||
@@ -290,31 +290,34 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
|
|||||||
}) %>%
|
}) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
# Calculate overall mean and SD for lm_Score_* variables
|
# Continue with the rest of the function as before
|
||||||
lm_means_sds <- calculations %>%
|
lm_means_sds <- calculations %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
summarise(
|
summarise(
|
||||||
L_mean = mean(lm_Score_L, na.rm = TRUE),
|
mean_lm_L = mean(lm_Score_L, na.rm = TRUE),
|
||||||
L_sd = sd(lm_Score_L, na.rm = TRUE),
|
sd_lm_L = sd(lm_Score_L, na.rm = TRUE),
|
||||||
K_mean = mean(lm_Score_K, na.rm = TRUE),
|
mean_lm_K = mean(lm_Score_K, na.rm = TRUE),
|
||||||
K_sd = sd(lm_Score_K, na.rm = TRUE),
|
sd_lm_K = sd(lm_Score_K, na.rm = TRUE),
|
||||||
r_mean = mean(lm_Score_r, na.rm = TRUE),
|
mean_lm_r = mean(lm_Score_r, na.rm = TRUE),
|
||||||
r_sd = sd(lm_Score_r, na.rm = TRUE),
|
sd_lm_r = sd(lm_Score_r, na.rm = TRUE),
|
||||||
AUC_mean = mean(lm_Score_AUC, na.rm = TRUE),
|
mean_lm_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
||||||
AUC_sd = sd(lm_Score_AUC, na.rm = TRUE)
|
sd_lm_AUC = sd(lm_Score_AUC, na.rm = TRUE)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate gene Z-scores
|
# Continue with gene Z-scores and interactions
|
||||||
calculations <- calculations %>%
|
calculations <- calculations %>%
|
||||||
|
left_join(lm_means_sds, by = group_vars) %>%
|
||||||
|
group_by(across(all_of(group_vars))) %>%
|
||||||
mutate(
|
mutate(
|
||||||
Z_lm_L = (lm_Score_L - lm_means_sds$L_mean) / lm_means_sds$L_sd,
|
Z_lm_L = (lm_Score_L - mean_lm_L) / sd_lm_L,
|
||||||
Z_lm_K = (lm_Score_K - lm_means_sds$K_mean) / lm_means_sds$K_sd,
|
Z_lm_K = (lm_Score_K - mean_lm_K) / sd_lm_K,
|
||||||
Z_lm_r = (lm_Score_r - lm_means_sds$r_mean) / lm_means_sds$r_sd,
|
Z_lm_r = (lm_Score_r - mean_lm_r) / sd_lm_r,
|
||||||
Z_lm_AUC = (lm_Score_AUC - lm_means_sds$AUC_mean) / lm_means_sds$AUC_sd
|
Z_lm_AUC = (lm_Score_AUC - mean_lm_AUC) / sd_lm_AUC
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build summary stats (interactions)
|
# Build summary stats (interactions)
|
||||||
interactions <- calculations %>%
|
interactions <- calculations %>%
|
||||||
|
group_by(across(all_of(group_vars))) %>%
|
||||||
summarise(
|
summarise(
|
||||||
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
|
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
|
||||||
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
|
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
|
||||||
@@ -359,54 +362,14 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fit correlation models between Z_lm_* and Avg_Zscore_* (same-variable)
|
# Return full data and correlation stats
|
||||||
correlation_lms_same <- list(
|
|
||||||
L = lm(Z_lm_L ~ Avg_Zscore_L, data = interactions),
|
|
||||||
K = lm(Z_lm_K ~ Avg_Zscore_K, data = interactions),
|
|
||||||
r = lm(Z_lm_r ~ Avg_Zscore_r, data = interactions),
|
|
||||||
AUC = lm(Z_lm_AUC ~ Avg_Zscore_AUC, data = interactions)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract correlation statistics for same-variable correlations
|
|
||||||
correlation_stats_same <- map(correlation_lms_same, ~ {
|
|
||||||
list(
|
|
||||||
intercept = coef(.x)[1],
|
|
||||||
slope = coef(.x)[2],
|
|
||||||
r_squared = summary(.x)$r.squared
|
|
||||||
)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Fit additional correlation models between different Z_lm_* variables
|
|
||||||
correlation_lms_diff <- list(
|
|
||||||
L_vs_K = lm(Z_lm_K ~ Z_lm_L, data = interactions),
|
|
||||||
L_vs_r = lm(Z_lm_r ~ Z_lm_L, data = interactions),
|
|
||||||
L_vs_AUC = lm(Z_lm_AUC ~ Z_lm_L, data = interactions),
|
|
||||||
K_vs_r = lm(Z_lm_r ~ Z_lm_K, data = interactions),
|
|
||||||
K_vs_AUC = lm(Z_lm_AUC ~ Z_lm_K, data = interactions),
|
|
||||||
r_vs_AUC = lm(Z_lm_AUC ~ Z_lm_r, data = interactions)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract correlation statistics for different-variable correlations
|
|
||||||
correlation_stats_diff <- map(correlation_lms_diff, ~ {
|
|
||||||
list(
|
|
||||||
intercept = coef(.x)[1],
|
|
||||||
slope = coef(.x)[2],
|
|
||||||
r_squared = summary(.x)$r.squared
|
|
||||||
)
|
|
||||||
})
|
|
||||||
|
|
||||||
# Combine all correlation stats
|
|
||||||
correlation_stats <- c(correlation_stats_same, correlation_stats_diff)
|
|
||||||
|
|
||||||
# Prepare full_data by merging interactions back into calculations
|
|
||||||
full_data <- calculations %>%
|
full_data <- calculations %>%
|
||||||
left_join(interactions, by = group_vars)
|
left_join(interactions, by = group_vars)
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
calculations = calculations,
|
calculations = calculations,
|
||||||
interactions = interactions,
|
interactions = interactions,
|
||||||
full_data = full_data,
|
full_data = full_data
|
||||||
correlation_stats = correlation_stats
|
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -709,7 +672,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
if (type == "reference") {
|
if (type == "reference") {
|
||||||
group_vars <- c("OrfRep", "Gene", "num")
|
group_vars <- c("OrfRep", "Gene", "num")
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
mutate(OrfRepCombined = do.call(paste, c(across(all_of(group_vars)), sep = "_")))
|
mutate(OrfRepCombined = paste(!!!syms(group_vars), sep = "_"))
|
||||||
} else if (type == "deletion") {
|
} else if (type == "deletion") {
|
||||||
group_vars <- c("OrfRep", "Gene")
|
group_vars <- c("OrfRep", "Gene")
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
@@ -1251,9 +1214,9 @@ main <- function() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Generating quality control plots in parallel
|
# Generating quality control plots in parallel
|
||||||
furrr::future_map(plot_configs, function(config) {
|
# furrr::future_map(plot_configs, function(config) {
|
||||||
generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
||||||
}, .options = furrr_options(seed = TRUE))
|
# }, .options = furrr_options(seed = TRUE))
|
||||||
|
|
||||||
# Process background strains
|
# Process background strains
|
||||||
bg_strains <- c("YDL227C")
|
bg_strains <- c("YDL227C")
|
||||||
@@ -1311,7 +1274,7 @@ main <- function() {
|
|||||||
df_reference_stats <- calculate_summary_stats(
|
df_reference_stats <- calculate_summary_stats(
|
||||||
df = df_reference,
|
df = df_reference,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")
|
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
||||||
zscore_calculations_reference <- reference_results$calculations
|
zscore_calculations_reference <- reference_results$calculations
|
||||||
@@ -1322,7 +1285,7 @@ main <- function() {
|
|||||||
df_deletion_stats <- calculate_summary_stats(
|
df_deletion_stats <- calculate_summary_stats(
|
||||||
df = df_deletion,
|
df = df_deletion,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor", "conc_num_factor_factor")
|
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene"))
|
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene"))
|
||||||
zscore_calculations <- deletion_results$calculations
|
zscore_calculations <- deletion_results$calculations
|
||||||
|
|||||||
Reference in New Issue
Block a user