More interaction df changes

This commit is contained in:
2024-09-18 20:08:24 -04:00
parent 1b7e5f6e5d
commit 62e07f53fd

View File

@@ -181,19 +181,15 @@ calculate_summary_stats <- function(df, variables, group_vars) {
) )
# Create a cleaned version of df that doesn't overlap with summary_stats # Create a cleaned version of df that doesn't overlap with summary_stats
cols_to_keep <- setdiff(names(df), names(summary_stats)[-which(names(summary_stats) %in% group_vars)]) cleaned_df <- df %>%
df_cleaned <- df %>% select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
select(all_of(cols_to_keep)) df_joined <- left_join(cleaned_df, summary_stats, by = group_vars)
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars) return(list(summary_stats = summary_stats, df_with_stats = df_joined))
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
} }
calculate_interaction_scores <- function(df, max_conc) { calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "num")) {
variables <- c("L", "K", "r", "AUC")
group_vars <- c("OrfRep", "Gene", "num")
# Calculate total concentration variables # Calculate total concentration variables
total_conc_num <- length(unique(df$conc_num)) total_conc_num <- length(unique(df$conc_num))
@@ -214,11 +210,14 @@ calculate_interaction_scores <- function(df, max_conc) {
) )
# Calculate per spot # Calculate per spot
stats <- calculate_summary_stats(df, # sd and se calculations are invalid when grouping at this level
interaction_ss <- calculate_summary_stats(
df = df,
variables = variables, variables = variables,
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor") group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
)$summary_stats )$df_with_stats
# Load original WT data
stats <- df %>% stats <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
mutate( mutate(
@@ -232,6 +231,11 @@ calculate_interaction_scores <- function(df, max_conc) {
WT_sd_AUC = sd_AUC WT_sd_AUC = sd_AUC
) )
cleaned_df <- stats %>%
select(-any_of(setdiff(intersect(names(stats), names(interaction_ss)),
c(group_vars, "sd_L", "sd_K", "sd_r", "sd_AUC", "se_L", "se_K", "se_r", "se_AUC"))))
stats <- left_join(cleaned_df, interaction_ss, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
stats <- stats %>% stats <- stats %>%
mutate( mutate(
Raw_Shift_L = first(mean_L) - bg_means$L, Raw_Shift_L = first(mean_L) - bg_means$L,
@@ -273,11 +277,11 @@ calculate_interaction_scores <- function(df, max_conc) {
Zscore_AUC = Delta_AUC / WT_sd_AUC Zscore_AUC = Delta_AUC / WT_sd_AUC
) )
# Calculate linear models # Fit linear models and calculate interaction scores
lm_L <- lm(Delta_L ~ conc_num, data = stats) gene_lm_L <- lm(formula = Delta_L ~ conc_num_factor, data = df)
lm_K <- lm(Delta_K ~ conc_num, data = stats) gene_lm_K <- lm(formula = Delta_K ~ conc_num_factor, data = df)
lm_r <- lm(Delta_r ~ conc_num, data = stats) gene_lm_r <- lm(formula = Delta_r ~ conc_num_factor, data = df)
lm_AUC <- lm(Delta_AUC ~ conc_num, data = stats) gene_lm_AUC <- lm(formula = Delta_AUC ~ conc_num_factor, data = df)
interactions <- stats %>% interactions <- stats %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
@@ -294,24 +298,15 @@ calculate_interaction_scores <- function(df, max_conc) {
Z_Shift_L = first(Z_Shift_L), Z_Shift_L = first(Z_Shift_L),
Z_Shift_K = first(Z_Shift_K), Z_Shift_K = first(Z_Shift_K),
Z_Shift_r = first(Z_Shift_r), Z_Shift_r = first(Z_Shift_r),
Z_Shift_AUC = first(Z_Shift_AUC) Z_Shift_AUC = first(Z_Shift_AUC),
) lm_Score_L = max_conc * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1],
lm_Score_K = max_conc * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1],
# Summarise the data to calculate summary statistics lm_Score_r = max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1],
summary_stats <- interactions %>% lm_Score_AUC = max_conc * (gene_lm_AUC$coefficients[2]) + gene_lm_AUC$coefficients[1],
summarise( R_Squared_L = summary(gene_lm_L)$r.squared,
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), R_Squared_K = summary(gene_lm_K)$r.squared,
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), R_Squared_r = summary(gene_lm_r)$r.squared,
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), R_Squared_AUC = summary(gene_lm_AUC)$r.squared,
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
lm_Score_L = max(conc_num) * coef(lm(Zscore_L ~ conc_num))[2] + coef(lm(Zscore_L ~ conc_num))[1],
lm_Score_K = max(conc_num) * coef(lm(Zscore_K ~ conc_num))[2] + coef(lm(Zscore_K ~ conc_num))[1],
lm_Score_r = max(conc_num) * coef(lm(Zscore_r ~ conc_num))[2] + coef(lm(Zscore_r ~ conc_num))[1],
lm_Score_AUC = max(conc_num) * coef(lm(Zscore_AUC ~ conc_num))[2] + coef(lm(Zscore_AUC ~ conc_num))[1],
R_Squared_L = summary(lm(Zscore_L ~ conc_num))$r.squared,
R_Squared_K = summary(lm(Zscore_K ~ conc_num))$r.squared,
R_Squared_r = summary(lm(Zscore_r ~ conc_num))$r.squared,
R_Squared_AUC = summary(lm(Zscore_AUC ~ conc_num))$r.squared,
lm_intercept_L = coef(lm(Zscore_L ~ conc_num))[1], lm_intercept_L = coef(lm(Zscore_L ~ conc_num))[1],
lm_slope_L = coef(lm(Zscore_L ~ conc_num))[2], lm_slope_L = coef(lm(Zscore_L ~ conc_num))[2],
lm_intercept_K = coef(lm(Zscore_K ~ conc_num))[1], lm_intercept_K = coef(lm(Zscore_K ~ conc_num))[1],
@@ -320,23 +315,16 @@ calculate_interaction_scores <- function(df, max_conc) {
lm_slope_r = coef(lm(Zscore_r ~ conc_num))[2], lm_slope_r = coef(lm(Zscore_r ~ conc_num))[2],
lm_intercept_AUC = coef(lm(Zscore_AUC ~ conc_num))[1], lm_intercept_AUC = coef(lm(Zscore_AUC ~ conc_num))[1],
lm_slope_AUC = coef(lm(Zscore_AUC ~ conc_num))[2], lm_slope_AUC = coef(lm(Zscore_AUC ~ conc_num))[2],
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
NG = sum(NG, na.rm = TRUE), NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE), DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE), SM = sum(SM, na.rm = TRUE),
.groups = "keep" num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1
) )
# Join the summary data back to the original data
cleaned_interactions <- interactions %>%
select(-any_of(intersect(names(interactions), names(summary_stats))))
interactions_joined <- left_join(cleaned_interactions, summary_stats, by = group_vars)
interactions_joined <- interactions_joined %>% distinct()
# Remove duplicate rows if necessary
interactions <- interactions %>% distinct()
num_non_removed_concs <- total_conc_num - sum(stats$DB, na.rm = TRUE) - 1
interactions <- interactions %>% interactions <- interactions %>%
mutate( mutate(
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
@@ -367,13 +355,13 @@ calculate_interaction_scores <- function(df, max_conc) {
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
"NG", "SM", "DB") "NG", "SM", "DB")
calculations_joined <- df %>% cleaned_df <- df %>%
select(-any_of(intersect(names(df), names(calculations)))) select(-any_of(setdiff(intersect(names(df), names(calculations)), group_vars)))
calculations_joined <- left_join(calculations_joined, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")) calculations_joined <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
interactions_joined <- df %>% cleaned_df <- df %>%
select(-any_of(intersect(names(df), names(interactions)))) select(-any_of(setdiff(intersect(names(df), names(interactions)), group_vars)))
interactions_joined <- left_join(interactions_joined, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")) interactions_joined <- left_join(cleaned_df, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
return(list(calculations = calculations, interactions = interactions, interactions_joined = interactions_joined, return(list(calculations = calculations, interactions = interactions, interactions_joined = interactions_joined,
calculations_joined = calculations_joined)) calculations_joined = calculations_joined))