|
@@ -181,19 +181,15 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
|
|
)
|
|
|
|
|
|
# Create a cleaned version of df that doesn't overlap with summary_stats
|
|
|
- cols_to_keep <- setdiff(names(df), names(summary_stats)[-which(names(summary_stats) %in% group_vars)])
|
|
|
- df_cleaned <- df %>%
|
|
|
- select(all_of(cols_to_keep))
|
|
|
+ cleaned_df <- df %>%
|
|
|
+ select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
|
|
|
+ df_joined <- left_join(cleaned_df, summary_stats, by = group_vars)
|
|
|
|
|
|
- df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
|
|
-
|
|
|
- return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
|
|
+ return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
|
|
}
|
|
|
|
|
|
-calculate_interaction_scores <- function(df, max_conc) {
|
|
|
-
|
|
|
- variables <- c("L", "K", "r", "AUC")
|
|
|
- group_vars <- c("OrfRep", "Gene", "num")
|
|
|
+calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"),
|
|
|
+ group_vars = c("OrfRep", "Gene", "num")) {
|
|
|
|
|
|
# Calculate total concentration variables
|
|
|
total_conc_num <- length(unique(df$conc_num))
|
|
@@ -214,11 +210,14 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
)
|
|
|
|
|
|
# Calculate per spot
|
|
|
- stats <- calculate_summary_stats(df,
|
|
|
+ # sd and se calculations are invalid when grouping at this level
|
|
|
+ interaction_ss <- calculate_summary_stats(
|
|
|
+ df = df,
|
|
|
variables = variables,
|
|
|
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
|
|
- )$summary_stats
|
|
|
+ )$df_with_stats
|
|
|
|
|
|
+ # Load original WT data
|
|
|
stats <- df %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
mutate(
|
|
@@ -232,6 +231,11 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
WT_sd_AUC = sd_AUC
|
|
|
)
|
|
|
|
|
|
+ cleaned_df <- stats %>%
|
|
|
+ select(-any_of(setdiff(intersect(names(stats), names(interaction_ss)),
|
|
|
+ c(group_vars, "sd_L", "sd_K", "sd_r", "sd_AUC", "se_L", "se_K", "se_r", "se_AUC"))))
|
|
|
+ stats <- left_join(cleaned_df, interaction_ss, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
+
|
|
|
stats <- stats %>%
|
|
|
mutate(
|
|
|
Raw_Shift_L = first(mean_L) - bg_means$L,
|
|
@@ -273,11 +277,11 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
|
|
)
|
|
|
|
|
|
- # Calculate linear models
|
|
|
- lm_L <- lm(Delta_L ~ conc_num, data = stats)
|
|
|
- lm_K <- lm(Delta_K ~ conc_num, data = stats)
|
|
|
- lm_r <- lm(Delta_r ~ conc_num, data = stats)
|
|
|
- lm_AUC <- lm(Delta_AUC ~ conc_num, data = stats)
|
|
|
+ # Fit linear models and calculate interaction scores
|
|
|
+ gene_lm_L <- lm(formula = Delta_L ~ conc_num_factor, data = df)
|
|
|
+ gene_lm_K <- lm(formula = Delta_K ~ conc_num_factor, data = df)
|
|
|
+ gene_lm_r <- lm(formula = Delta_r ~ conc_num_factor, data = df)
|
|
|
+ gene_lm_AUC <- lm(formula = Delta_AUC ~ conc_num_factor, data = df)
|
|
|
|
|
|
interactions <- stats %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
@@ -294,24 +298,15 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
Z_Shift_L = first(Z_Shift_L),
|
|
|
Z_Shift_K = first(Z_Shift_K),
|
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
|
- Z_Shift_AUC = first(Z_Shift_AUC)
|
|
|
- )
|
|
|
-
|
|
|
- # Summarise the data to calculate summary statistics
|
|
|
- summary_stats <- interactions %>%
|
|
|
- summarise(
|
|
|
- Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
|
|
- Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
|
|
- Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
|
|
- Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
|
|
|
- lm_Score_L = max(conc_num) * coef(lm(Zscore_L ~ conc_num))[2] + coef(lm(Zscore_L ~ conc_num))[1],
|
|
|
- lm_Score_K = max(conc_num) * coef(lm(Zscore_K ~ conc_num))[2] + coef(lm(Zscore_K ~ conc_num))[1],
|
|
|
- lm_Score_r = max(conc_num) * coef(lm(Zscore_r ~ conc_num))[2] + coef(lm(Zscore_r ~ conc_num))[1],
|
|
|
- lm_Score_AUC = max(conc_num) * coef(lm(Zscore_AUC ~ conc_num))[2] + coef(lm(Zscore_AUC ~ conc_num))[1],
|
|
|
- R_Squared_L = summary(lm(Zscore_L ~ conc_num))$r.squared,
|
|
|
- R_Squared_K = summary(lm(Zscore_K ~ conc_num))$r.squared,
|
|
|
- R_Squared_r = summary(lm(Zscore_r ~ conc_num))$r.squared,
|
|
|
- R_Squared_AUC = summary(lm(Zscore_AUC ~ conc_num))$r.squared,
|
|
|
+ Z_Shift_AUC = first(Z_Shift_AUC),
|
|
|
+ lm_Score_L = max_conc * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1],
|
|
|
+ lm_Score_K = max_conc * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1],
|
|
|
+ lm_Score_r = max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1],
|
|
|
+ lm_Score_AUC = max_conc * (gene_lm_AUC$coefficients[2]) + gene_lm_AUC$coefficients[1],
|
|
|
+ R_Squared_L = summary(gene_lm_L)$r.squared,
|
|
|
+ R_Squared_K = summary(gene_lm_K)$r.squared,
|
|
|
+ R_Squared_r = summary(gene_lm_r)$r.squared,
|
|
|
+ R_Squared_AUC = summary(gene_lm_AUC)$r.squared,
|
|
|
lm_intercept_L = coef(lm(Zscore_L ~ conc_num))[1],
|
|
|
lm_slope_L = coef(lm(Zscore_L ~ conc_num))[2],
|
|
|
lm_intercept_K = coef(lm(Zscore_K ~ conc_num))[1],
|
|
@@ -320,22 +315,15 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
lm_slope_r = coef(lm(Zscore_r ~ conc_num))[2],
|
|
|
lm_intercept_AUC = coef(lm(Zscore_AUC ~ conc_num))[1],
|
|
|
lm_slope_AUC = coef(lm(Zscore_AUC ~ conc_num))[2],
|
|
|
+ Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
|
|
+ Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
|
|
+ Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
|
|
+ Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
|
|
|
NG = sum(NG, na.rm = TRUE),
|
|
|
DB = sum(DB, na.rm = TRUE),
|
|
|
SM = sum(SM, na.rm = TRUE),
|
|
|
- .groups = "keep"
|
|
|
+ num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1
|
|
|
)
|
|
|
-
|
|
|
- # Join the summary data back to the original data
|
|
|
- cleaned_interactions <- interactions %>%
|
|
|
- select(-any_of(intersect(names(interactions), names(summary_stats))))
|
|
|
- interactions_joined <- left_join(cleaned_interactions, summary_stats, by = group_vars)
|
|
|
- interactions_joined <- interactions_joined %>% distinct()
|
|
|
-
|
|
|
- # Remove duplicate rows if necessary
|
|
|
- interactions <- interactions %>% distinct()
|
|
|
-
|
|
|
- num_non_removed_concs <- total_conc_num - sum(stats$DB, na.rm = TRUE) - 1
|
|
|
|
|
|
interactions <- interactions %>%
|
|
|
mutate(
|
|
@@ -367,13 +355,13 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
|
|
|
"NG", "SM", "DB")
|
|
|
|
|
|
- calculations_joined <- df %>%
|
|
|
- select(-any_of(intersect(names(df), names(calculations))))
|
|
|
- calculations_joined <- left_join(calculations_joined, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
+ cleaned_df <- df %>%
|
|
|
+ select(-any_of(setdiff(intersect(names(df), names(calculations)), group_vars)))
|
|
|
+ calculations_joined <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
|
|
|
- interactions_joined <- df %>%
|
|
|
- select(-any_of(intersect(names(df), names(interactions))))
|
|
|
- interactions_joined <- left_join(interactions_joined, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
+ cleaned_df <- df %>%
|
|
|
+ select(-any_of(setdiff(intersect(names(df), names(interactions)), group_vars)))
|
|
|
+ interactions_joined <- left_join(cleaned_df, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
|
|
|
return(list(calculations = calculations, interactions = interactions, interactions_joined = interactions_joined,
|
|
|
calculations_joined = calculations_joined))
|