|
@@ -184,7 +184,6 @@ update_gene_names <- function(df, sgd_gene_list) {
|
|
}
|
|
}
|
|
|
|
|
|
calculate_summary_stats <- function(df, variables, group_vars) {
|
|
calculate_summary_stats <- function(df, variables, group_vars) {
|
|
-
|
|
|
|
summary_stats <- df %>%
|
|
summary_stats <- df %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
summarise(
|
|
summarise(
|
|
@@ -212,12 +211,12 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
|
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
|
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
|
}
|
|
}
|
|
|
|
|
|
-calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|
|
|
- group_vars = c("OrfRep", "Gene", "num")) {
|
|
|
|
-
|
|
|
|
|
|
+calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) {
|
|
|
|
+
|
|
# Calculate total concentration variables
|
|
# Calculate total concentration variables
|
|
total_conc_num <- length(unique(df$conc_num))
|
|
total_conc_num <- length(unique(df$conc_num))
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # Initial calculations
|
|
calculations <- df %>%
|
|
calculations <- df %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
mutate(
|
|
mutate(
|
|
@@ -225,20 +224,24 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|
DB = sum(DB, na.rm = TRUE),
|
|
DB = sum(DB, na.rm = TRUE),
|
|
SM = sum(SM, na.rm = TRUE),
|
|
SM = sum(SM, na.rm = TRUE),
|
|
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
|
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
|
-
|
|
|
|
|
|
+
|
|
# Calculate raw data
|
|
# Calculate raw data
|
|
Raw_Shift_L = first(mean_L) - bg_stats$mean_L,
|
|
Raw_Shift_L = first(mean_L) - bg_stats$mean_L,
|
|
Raw_Shift_K = first(mean_K) - bg_stats$mean_K,
|
|
Raw_Shift_K = first(mean_K) - bg_stats$mean_K,
|
|
Raw_Shift_r = first(mean_r) - bg_stats$mean_r,
|
|
Raw_Shift_r = first(mean_r) - bg_stats$mean_r,
|
|
Raw_Shift_AUC = first(mean_AUC) - bg_stats$mean_AUC,
|
|
Raw_Shift_AUC = first(mean_AUC) - bg_stats$mean_AUC,
|
|
- Z_Shift_L = first(Raw_Shift_L) / bg_stats$sd_L,
|
|
|
|
- Z_Shift_K = first(Raw_Shift_K) / bg_stats$sd_K,
|
|
|
|
- Z_Shift_r = first(Raw_Shift_r) / bg_stats$sd_r,
|
|
|
|
- Z_Shift_AUC = first(Raw_Shift_AUC) / bg_stats$sd_AUC,
|
|
|
|
|
|
+ Z_Shift_L = Raw_Shift_L / bg_stats$sd_L,
|
|
|
|
+ Z_Shift_K = Raw_Shift_K / bg_stats$sd_K,
|
|
|
|
+ Z_Shift_r = Raw_Shift_r / bg_stats$sd_r,
|
|
|
|
+ Z_Shift_AUC = Raw_Shift_AUC / bg_stats$sd_AUC,
|
|
|
|
+
|
|
|
|
+ # Expected values
|
|
Exp_L = WT_L + Raw_Shift_L,
|
|
Exp_L = WT_L + Raw_Shift_L,
|
|
Exp_K = WT_K + Raw_Shift_K,
|
|
Exp_K = WT_K + Raw_Shift_K,
|
|
Exp_r = WT_r + Raw_Shift_r,
|
|
Exp_r = WT_r + Raw_Shift_r,
|
|
Exp_AUC = WT_AUC + Raw_Shift_AUC,
|
|
Exp_AUC = WT_AUC + Raw_Shift_AUC,
|
|
|
|
+
|
|
|
|
+ # Deltas
|
|
Delta_L = mean_L - Exp_L,
|
|
Delta_L = mean_L - Exp_L,
|
|
Delta_K = mean_K - Exp_K,
|
|
Delta_K = mean_K - Exp_K,
|
|
Delta_r = mean_r - Exp_r,
|
|
Delta_r = mean_r - Exp_r,
|
|
@@ -248,19 +251,24 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
|
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
|
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
|
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
|
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
|
|
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
|
|
-
|
|
|
|
|
|
+
|
|
# Calculate Z-scores
|
|
# Calculate Z-scores
|
|
Zscore_L = Delta_L / WT_sd_L,
|
|
Zscore_L = Delta_L / WT_sd_L,
|
|
Zscore_K = Delta_K / WT_sd_K,
|
|
Zscore_K = Delta_K / WT_sd_K,
|
|
Zscore_r = Delta_r / WT_sd_r,
|
|
Zscore_r = Delta_r / WT_sd_r,
|
|
- Zscore_AUC = Delta_AUC / WT_sd_AUC,
|
|
|
|
-
|
|
|
|
- # Fit linear models and store in list-columns
|
|
|
|
- gene_lm_L = list(lm(Delta_L ~ conc_num_factor, data = pick(everything()))),
|
|
|
|
- gene_lm_K = list(lm(Delta_K ~ conc_num_factor, data = pick(everything()))),
|
|
|
|
- gene_lm_r = list(lm(Delta_r ~ conc_num_factor, data = pick(everything()))),
|
|
|
|
- gene_lm_AUC = list(lm(Delta_AUC ~ conc_num_factor, data = pick(everything()))),
|
|
|
|
-
|
|
|
|
|
|
+ Zscore_AUC = Delta_AUC / WT_sd_AUC
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Fit linear models per group
|
|
|
|
+ lm_results <- calculations %>%
|
|
|
|
+ nest() %>%
|
|
|
|
+ mutate(
|
|
|
|
+ # Fit linear models
|
|
|
|
+ gene_lm_L = map(data, ~ lm(Delta_L ~ conc_num_factor, data = .x)),
|
|
|
|
+ gene_lm_K = map(data, ~ lm(Delta_K ~ conc_num_factor, data = .x)),
|
|
|
|
+ gene_lm_r = map(data, ~ lm(Delta_r ~ conc_num_factor, data = .x)),
|
|
|
|
+ gene_lm_AUC = map(data, ~ lm(Delta_AUC ~ conc_num_factor, data = .x)),
|
|
|
|
+
|
|
# Extract coefficients using purrr::map_dbl
|
|
# Extract coefficients using purrr::map_dbl
|
|
lm_intercept_L = map_dbl(gene_lm_L, ~ coef(.x)[1]),
|
|
lm_intercept_L = map_dbl(gene_lm_L, ~ coef(.x)[1]),
|
|
lm_slope_L = map_dbl(gene_lm_L, ~ coef(.x)[2]),
|
|
lm_slope_L = map_dbl(gene_lm_L, ~ coef(.x)[2]),
|
|
@@ -270,129 +278,152 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|
lm_slope_r = map_dbl(gene_lm_r, ~ coef(.x)[2]),
|
|
lm_slope_r = map_dbl(gene_lm_r, ~ coef(.x)[2]),
|
|
lm_intercept_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[1]),
|
|
lm_intercept_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[1]),
|
|
lm_slope_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[2]),
|
|
lm_slope_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[2]),
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # Calculate R-squared values for Delta_ models
|
|
|
|
+ R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared),
|
|
|
|
+ R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared),
|
|
|
|
+ R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared),
|
|
|
|
+ R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared),
|
|
|
|
+
|
|
# Calculate lm_Score_* based on coefficients
|
|
# Calculate lm_Score_* based on coefficients
|
|
lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
|
|
lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
|
|
lm_Score_K = max_conc * lm_slope_K + lm_intercept_K,
|
|
lm_Score_K = max_conc * lm_slope_K + lm_intercept_K,
|
|
lm_Score_r = max_conc * lm_slope_r + lm_intercept_r,
|
|
lm_Score_r = max_conc * lm_slope_r + lm_intercept_r,
|
|
- lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC,
|
|
|
|
-
|
|
|
|
- # Calculate R-squared values
|
|
|
|
- R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared),
|
|
|
|
- R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared),
|
|
|
|
- R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared),
|
|
|
|
- R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared)
|
|
|
|
|
|
+ lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
|
|
) %>%
|
|
) %>%
|
|
|
|
+ select(-data, -starts_with("gene_lm_")) %>%
|
|
ungroup()
|
|
ungroup()
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # Merge lm_results back into calculations
|
|
|
|
+ calculations <- calculations %>%
|
|
|
|
+ left_join(lm_results, by = group_vars)
|
|
|
|
+
|
|
# Calculate overall mean and SD for lm_Score_* variables
|
|
# Calculate overall mean and SD for lm_Score_* variables
|
|
- lm_means_sds <- calculations %>%
|
|
|
|
|
|
+ gene_lm_means <- lm_results %>%
|
|
summarise(
|
|
summarise(
|
|
- lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
|
|
|
|
- lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
|
|
|
|
- lm_mean_K = mean(lm_Score_K, na.rm = TRUE),
|
|
|
|
- lm_sd_K = sd(lm_Score_K, na.rm = TRUE),
|
|
|
|
- lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
|
|
|
|
- lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
|
|
|
|
- lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
|
|
|
- lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
|
|
|
|
|
|
+ L = mean(lm_Score_L, na.rm = TRUE),
|
|
|
|
+ K = mean(lm_Score_K, na.rm = TRUE),
|
|
|
|
+ r = mean(lm_Score_r, na.rm = TRUE),
|
|
|
|
+ AUC = mean(lm_Score_AUC, na.rm = TRUE)
|
|
)
|
|
)
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ gene_lm_sds <- lm_results %>%
|
|
|
|
+ summarise(
|
|
|
|
+ L = sd(lm_Score_L, na.rm = TRUE),
|
|
|
|
+ K = sd(lm_Score_K, na.rm = TRUE),
|
|
|
|
+ r = sd(lm_Score_r, na.rm = TRUE),
|
|
|
|
+ AUC = sd(lm_Score_AUC, na.rm = TRUE)
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Calculate gene Z-scores
|
|
calculations <- calculations %>%
|
|
calculations <- calculations %>%
|
|
mutate(
|
|
mutate(
|
|
- Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L,
|
|
|
|
- Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K,
|
|
|
|
- Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r,
|
|
|
|
- Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC
|
|
|
|
|
|
+ Z_lm_L = (lm_Score_L - gene_lm_means$L) / gene_lm_sds$L,
|
|
|
|
+ Z_lm_K = (lm_Score_K - gene_lm_means$K) / gene_lm_sds$K,
|
|
|
|
+ Z_lm_r = (lm_Score_r - gene_lm_means$r) / gene_lm_sds$r,
|
|
|
|
+ Z_lm_AUC = (lm_Score_AUC - gene_lm_means$AUC) / gene_lm_sds$AUC
|
|
)
|
|
)
|
|
-
|
|
|
|
- # Summarize some of the stats
|
|
|
|
|
|
+
|
|
|
|
+ # Build summary stats (interactions)
|
|
interactions <- calculations %>%
|
|
interactions <- calculations %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
- mutate(
|
|
|
|
- # Calculate raw shifts
|
|
|
|
|
|
+ summarise(
|
|
|
|
+ # Calculate average Z-scores
|
|
|
|
+ Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
|
|
|
|
+ Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
|
|
|
|
+ Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(num_non_removed_concs),
|
|
|
|
+ Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(num_non_removed_concs),
|
|
|
|
+
|
|
|
|
+ # Interaction Z-scores
|
|
|
|
+ Z_lm_L = first(Z_lm_L),
|
|
|
|
+ Z_lm_K = first(Z_lm_K),
|
|
|
|
+ Z_lm_r = first(Z_lm_r),
|
|
|
|
+ Z_lm_AUC = first(Z_lm_AUC),
|
|
|
|
+
|
|
|
|
+ # Raw Shifts
|
|
Raw_Shift_L = first(Raw_Shift_L),
|
|
Raw_Shift_L = first(Raw_Shift_L),
|
|
Raw_Shift_K = first(Raw_Shift_K),
|
|
Raw_Shift_K = first(Raw_Shift_K),
|
|
Raw_Shift_r = first(Raw_Shift_r),
|
|
Raw_Shift_r = first(Raw_Shift_r),
|
|
Raw_Shift_AUC = first(Raw_Shift_AUC),
|
|
Raw_Shift_AUC = first(Raw_Shift_AUC),
|
|
-
|
|
|
|
- # Calculate Z-shifts
|
|
|
|
|
|
+
|
|
|
|
+ # Z Shifts
|
|
Z_Shift_L = first(Z_Shift_L),
|
|
Z_Shift_L = first(Z_Shift_L),
|
|
Z_Shift_K = first(Z_Shift_K),
|
|
Z_Shift_K = first(Z_Shift_K),
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
Z_Shift_AUC = first(Z_Shift_AUC),
|
|
Z_Shift_AUC = first(Z_Shift_AUC),
|
|
-
|
|
|
|
- # Sum Z-scores
|
|
|
|
- Sum_Z_Score_L = sum(Zscore_L),
|
|
|
|
- Sum_Z_Score_K = sum(Zscore_K),
|
|
|
|
- Sum_Z_Score_r = sum(Zscore_r),
|
|
|
|
- Sum_Z_Score_AUC = sum(Zscore_AUC),
|
|
|
|
-
|
|
|
|
- # Calculate Average Z-scores
|
|
|
|
- Avg_Zscore_L = Sum_Z_Score_L / num_non_removed_concs,
|
|
|
|
- Avg_Zscore_K = Sum_Z_Score_K / num_non_removed_concs,
|
|
|
|
- Avg_Zscore_r = Sum_Z_Score_r / (total_conc_num - 1),
|
|
|
|
- Avg_Zscore_AUC = Sum_Z_Score_AUC / (total_conc_num - 1)
|
|
|
|
|
|
+
|
|
|
|
+ # Include NG, DB, SM
|
|
|
|
+ NG = first(NG),
|
|
|
|
+ DB = first(DB),
|
|
|
|
+ SM = first(SM)
|
|
) %>%
|
|
) %>%
|
|
arrange(desc(Z_lm_L), desc(NG)) %>%
|
|
arrange(desc(Z_lm_L), desc(NG)) %>%
|
|
ungroup()
|
|
ungroup()
|
|
-
|
|
|
|
- # Declare column order for output
|
|
|
|
- calculations <- calculations %>%
|
|
|
|
- select(
|
|
|
|
- "OrfRep", "Gene", "num", "N",
|
|
|
|
- "conc_num", "conc_num_factor", "conc_num_factor_factor",
|
|
|
|
- "mean_L", "mean_K", "mean_r", "mean_AUC",
|
|
|
|
- "median_L", "median_K", "median_r", "median_AUC",
|
|
|
|
- "sd_L", "sd_K", "sd_r", "sd_AUC",
|
|
|
|
- "se_L", "se_K", "se_r", "se_AUC",
|
|
|
|
- "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
|
|
|
- "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
|
|
|
- "WT_L", "WT_K", "WT_r", "WT_AUC",
|
|
|
|
- "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
|
|
|
|
- "Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
|
|
|
|
- "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
|
|
|
|
- "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
|
|
|
|
- "NG", "SM", "DB"
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # Calculate overlap
|
|
interactions <- interactions %>%
|
|
interactions <- interactions %>%
|
|
- select(
|
|
|
|
- "OrfRep", "Gene", "num", "NG", "DB", "SM",
|
|
|
|
- "conc_num", "conc_num_factor", "conc_num_factor_factor",
|
|
|
|
- "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
|
|
|
- "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
|
|
|
- "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
|
|
|
- "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
|
|
|
|
- "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
|
|
|
|
- "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
|
|
|
- "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"
|
|
|
|
|
|
+ mutate(
|
|
|
|
+ Overlap = case_when(
|
|
|
|
+ Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
|
|
|
|
+ Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
|
|
|
|
+ Z_lm_L >= overlap_threshold & Avg_Zscore_L < overlap_threshold ~ "Deletion Enhancer lm only",
|
|
|
|
+ Z_lm_L < overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
|
|
|
|
+ Z_lm_L <= -overlap_threshold & Avg_Zscore_L > -overlap_threshold ~ "Deletion Suppressor lm only",
|
|
|
|
+ Z_lm_L > -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
|
|
|
|
+ Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
|
|
|
|
+ Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
|
|
|
|
+ TRUE ~ "No Effect"
|
|
|
|
+ )
|
|
)
|
|
)
|
|
-
|
|
|
|
- # Clean the original dataframe by removing overlapping columns
|
|
|
|
- cleaned_df <- df %>%
|
|
|
|
- select(-any_of(
|
|
|
|
- setdiff(intersect(names(df), names(calculations)),
|
|
|
|
- c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))))
|
|
|
|
-
|
|
|
|
- # Join the original dataframe with calculations
|
|
|
|
- df_with_calculations <- left_join(cleaned_df, calculations,
|
|
|
|
- by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
|
|
|
-
|
|
|
|
- # Remove overlapping columns between df_with_calculations and interactions
|
|
|
|
- df_with_calculations_clean <- df_with_calculations %>%
|
|
|
|
- select(-any_of(
|
|
|
|
- setdiff(intersect(names(df_with_calculations), names(interactions)),
|
|
|
|
- c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))))
|
|
|
|
-
|
|
|
|
- # Join with interactions to create the full dataset
|
|
|
|
- full_data <- left_join(df_with_calculations_clean, interactions,
|
|
|
|
- by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
|
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ # Fit correlation models between Z_lm_* and Avg_Zscore_* (same-variable)
|
|
|
|
+ correlation_lms_same <- list(
|
|
|
|
+ L = lm(Z_lm_L ~ Avg_Zscore_L, data = interactions),
|
|
|
|
+ K = lm(Z_lm_K ~ Avg_Zscore_K, data = interactions),
|
|
|
|
+ r = lm(Z_lm_r ~ Avg_Zscore_r, data = interactions),
|
|
|
|
+ AUC = lm(Z_lm_AUC ~ Avg_Zscore_AUC, data = interactions)
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Extract correlation statistics for same-variable correlations
|
|
|
|
+ correlation_stats_same <- map(correlation_lms_same, ~ {
|
|
|
|
+ list(
|
|
|
|
+ intercept = coef(.x)[1],
|
|
|
|
+ slope = coef(.x)[2],
|
|
|
|
+ r_squared = summary(.x)$r.squared
|
|
|
|
+ )
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ # Fit additional correlation models between different Z_lm_* variables
|
|
|
|
+ correlation_lms_diff <- list(
|
|
|
|
+ L_vs_K = lm(Z_lm_K ~ Z_lm_L, data = interactions),
|
|
|
|
+ L_vs_r = lm(Z_lm_r ~ Z_lm_L, data = interactions),
|
|
|
|
+ L_vs_AUC = lm(Z_lm_AUC ~ Z_lm_L, data = interactions),
|
|
|
|
+ K_vs_r = lm(Z_lm_r ~ Z_lm_K, data = interactions),
|
|
|
|
+ K_vs_AUC = lm(Z_lm_AUC ~ Z_lm_K, data = interactions),
|
|
|
|
+ r_vs_AUC = lm(Z_lm_AUC ~ Z_lm_r, data = interactions)
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Extract correlation statistics for different-variable correlations
|
|
|
|
+ correlation_stats_diff <- map(correlation_lms_diff, ~ {
|
|
|
|
+ list(
|
|
|
|
+ intercept = coef(.x)[1],
|
|
|
|
+ slope = coef(.x)[2],
|
|
|
|
+ r_squared = summary(.x)$r.squared
|
|
|
|
+ )
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ # Combine all correlation stats
|
|
|
|
+ correlation_stats <- c(correlation_stats_same, correlation_stats_diff)
|
|
|
|
+
|
|
|
|
+ # Prepare full_data by merging interactions back into calculations
|
|
|
|
+ full_data <- calculations %>%
|
|
|
|
+ left_join(interactions, by = group_vars)
|
|
|
|
+
|
|
return(list(
|
|
return(list(
|
|
calculations = calculations,
|
|
calculations = calculations,
|
|
interactions = interactions,
|
|
interactions = interactions,
|
|
- full_data = full_data
|
|
|
|
|
|
+ full_data = full_data,
|
|
|
|
+ correlation_stats = correlation_stats
|
|
))
|
|
))
|
|
}
|
|
}
|
|
|
|
|
|
@@ -577,32 +608,11 @@ generate_scatter_plot <- function(plot, config) {
|
|
# Add error bars if specified
|
|
# Add error bars if specified
|
|
if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) {
|
|
if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) {
|
|
if (!is.null(config$error_bar_params)) {
|
|
if (!is.null(config$error_bar_params)) {
|
|
- # Error bar params are constants, so set them outside aes
|
|
|
|
- plot <- plot +
|
|
|
|
- geom_errorbar(
|
|
|
|
- aes(
|
|
|
|
- ymin = !!sym(config$y_var), # y_var mapped to y-axis
|
|
|
|
- ymax = !!sym(config$y_var)
|
|
|
|
- ),
|
|
|
|
- ymin = config$error_bar_params$ymin, # Constant values
|
|
|
|
- ymax = config$error_bar_params$ymax, # Constant values
|
|
|
|
- alpha = 0.3,
|
|
|
|
- linewidth = 0.5
|
|
|
|
- )
|
|
|
|
|
|
+ plot <- plot + geom_errorbar(aes(ymin = config$error_bar_params$ymin, ymax = config$error_bar_params$ymax))
|
|
} else {
|
|
} else {
|
|
- # Dynamically generate ymin and ymax based on column names
|
|
|
|
y_mean_col <- paste0("mean_", config$y_var)
|
|
y_mean_col <- paste0("mean_", config$y_var)
|
|
y_sd_col <- paste0("sd_", config$y_var)
|
|
y_sd_col <- paste0("sd_", config$y_var)
|
|
-
|
|
|
|
- plot <- plot +
|
|
|
|
- geom_errorbar(
|
|
|
|
- aes(
|
|
|
|
- ymin = !!sym(y_mean_col) - !!sym(y_sd_col), # Calculating ymin in aes
|
|
|
|
- ymax = !!sym(y_mean_col) + !!sym(y_sd_col) # Calculating ymax in aes
|
|
|
|
- ),
|
|
|
|
- alpha = 0.3,
|
|
|
|
- linewidth = 0.5
|
|
|
|
- )
|
|
|
|
|
|
+ plot <- plot + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), ymax = !!sym(y_mean_col) + !!sym(y_sd_col)))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -711,7 +721,18 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
|
|
return(list(plots = plot_configs))
|
|
return(list(plots = plot_configs))
|
|
}
|
|
}
|
|
|
|
|
|
-generate_interaction_plot_configs <- function(df, plot_type = "reference") {
|
|
|
|
|
|
+generate_interaction_plot_configs <- function(df, type) {
|
|
|
|
+
|
|
|
|
+ if (type == "reference") {
|
|
|
|
+ group_vars <- c("OrfRep", "Gene", "num")
|
|
|
|
+ df <- df %>%
|
|
|
|
+ mutate(OrfRepCombined = do.call(paste, c(across(all_of(group_vars)), sep = "_")))
|
|
|
|
+ } else if (type == "deletion") {
|
|
|
|
+ group_vars <- c("OrfRep", "Gene")
|
|
|
|
+ df <- df %>%
|
|
|
|
+ mutate(OrfRepCombined = OrfRep)
|
|
|
|
+ }
|
|
|
|
+
|
|
limits_map <- list(
|
|
limits_map <- list(
|
|
L = c(0, 130),
|
|
L = c(0, 130),
|
|
K = c(-20, 160),
|
|
K = c(-20, 160),
|
|
@@ -720,47 +741,50 @@ generate_interaction_plot_configs <- function(df, plot_type = "reference") {
|
|
)
|
|
)
|
|
|
|
|
|
delta_limits_map <- list(
|
|
delta_limits_map <- list(
|
|
- Delta_L = c(-60, 60),
|
|
|
|
- Delta_K = c(-60, 60),
|
|
|
|
- Delta_r = c(-0.6, 0.6),
|
|
|
|
- Delta_AUC = c(-6000, 6000)
|
|
|
|
|
|
+ L = c(-60, 60),
|
|
|
|
+ K = c(-60, 60),
|
|
|
|
+ r = c(-0.6, 0.6),
|
|
|
|
+ AUC = c(-6000, 6000)
|
|
)
|
|
)
|
|
|
|
|
|
- group_vars <- if (plot_type == "reference") c("OrfRep", "Gene", "num") else c("OrfRep", "Gene")
|
|
|
|
-
|
|
|
|
- df_filtered <- df %>%
|
|
|
|
- mutate(OrfRepCombined = if (plot_type == "reference") paste(OrfRep, Gene, num, sep = "_") else paste(OrfRep, Gene, sep = "_"))
|
|
|
|
-
|
|
|
|
overall_plot_configs <- list()
|
|
overall_plot_configs <- list()
|
|
delta_plot_configs <- list()
|
|
delta_plot_configs <- list()
|
|
|
|
|
|
- # Overall plots
|
|
|
|
|
|
+ # Overall plots with lm_line for each interaction
|
|
for (var in names(limits_map)) {
|
|
for (var in names(limits_map)) {
|
|
y_limits <- limits_map[[var]]
|
|
y_limits <- limits_map[[var]]
|
|
|
|
+
|
|
|
|
+ # Use the pre-calculated lm intercept and slope from the dataframe
|
|
|
|
+ lm_intercept_col <- paste0("lm_intercept_", var)
|
|
|
|
+ lm_slope_col <- paste0("lm_slope_", var)
|
|
|
|
|
|
plot_config <- list(
|
|
plot_config <- list(
|
|
- df = df_filtered,
|
|
|
|
|
|
+ df = df,
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
x_var = "conc_num_factor_factor",
|
|
x_var = "conc_num_factor_factor",
|
|
y_var = var,
|
|
y_var = var,
|
|
- x_label = unique(df_filtered$Drug)[1],
|
|
|
|
|
|
+ x_label = unique(df$Drug)[1],
|
|
title = sprintf("Scatter RF for %s with SD", var),
|
|
title = sprintf("Scatter RF for %s with SD", var),
|
|
coord_cartesian = y_limits,
|
|
coord_cartesian = y_limits,
|
|
error_bar = TRUE,
|
|
error_bar = TRUE,
|
|
- x_breaks = unique(df_filtered$conc_num_factor_factor),
|
|
|
|
- x_labels = as.character(unique(df_filtered$conc_num)),
|
|
|
|
|
|
+ x_breaks = unique(df$conc_num_factor_factor),
|
|
|
|
+ x_labels = as.character(unique(df$conc_num)),
|
|
position = "jitter",
|
|
position = "jitter",
|
|
- smooth = TRUE
|
|
|
|
|
|
+ smooth = TRUE,
|
|
|
|
+ lm_line = list(
|
|
|
|
+ intercept = mean(df[[lm_intercept_col]], na.rm = TRUE),
|
|
|
|
+ slope = mean(df[[lm_slope_col]], na.rm = TRUE)
|
|
|
|
+ )
|
|
)
|
|
)
|
|
overall_plot_configs <- append(overall_plot_configs, list(plot_config))
|
|
overall_plot_configs <- append(overall_plot_configs, list(plot_config))
|
|
}
|
|
}
|
|
|
|
|
|
- # Delta plots
|
|
|
|
- unique_groups <- df_filtered %>% select(all_of(group_vars)) %>% distinct()
|
|
|
|
|
|
+ # Delta plots (add lm_line if necessary)
|
|
|
|
+ unique_groups <- df %>% select(all_of(group_vars)) %>% distinct()
|
|
|
|
|
|
for (i in seq_len(nrow(unique_groups))) {
|
|
for (i in seq_len(nrow(unique_groups))) {
|
|
group <- unique_groups[i, ]
|
|
group <- unique_groups[i, ]
|
|
- group_data <- df_filtered %>% semi_join(group, by = group_vars)
|
|
|
|
|
|
+ group_data <- df %>% semi_join(group, by = group_vars)
|
|
|
|
|
|
OrfRep <- as.character(group$OrfRep)
|
|
OrfRep <- as.character(group$OrfRep)
|
|
Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
|
|
Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
|
|
@@ -770,13 +794,12 @@ generate_interaction_plot_configs <- function(df, plot_type = "reference") {
|
|
y_limits <- delta_limits_map[[var]]
|
|
y_limits <- delta_limits_map[[var]]
|
|
y_span <- y_limits[2] - y_limits[1]
|
|
y_span <- y_limits[2] - y_limits[1]
|
|
|
|
|
|
- WT_sd_var <- paste0("WT_sd_", sub("Delta_", "", var))
|
|
|
|
- WT_sd_value <- group_data[[WT_sd_var]][1]
|
|
|
|
- error_bar_ymin <- 0 - (2 * WT_sd_value)
|
|
|
|
- error_bar_ymax <- 0 + (2 * WT_sd_value)
|
|
|
|
|
|
+ # For error bars
|
|
|
|
+ WT_sd_value <- group_data[[paste0("WT_sd_", var)]][1]
|
|
|
|
|
|
- Z_Shift_value <- round(group_data[[paste0("Z_Shift_", sub("Delta_", "", var))]][1], 2)
|
|
|
|
- Z_lm_value <- round(group_data[[paste0("Z_lm_", sub("Delta_", "", var))]][1], 2)
|
|
|
|
|
|
+ Z_Shift_value <- round(group_data[[paste0("Z_Shift_", var)]][1], 2)
|
|
|
|
+ Z_lm_value <- round(group_data[[paste0("Z_lm_", var)]][1], 2)
|
|
|
|
+ R_squared_value <- round(group_data[[paste0("R_squared_", var)]][1], 2)
|
|
NG_value <- group_data$NG[1]
|
|
NG_value <- group_data$NG[1]
|
|
DB_value <- group_data$DB[1]
|
|
DB_value <- group_data$DB[1]
|
|
SM_value <- group_data$SM[1]
|
|
SM_value <- group_data$SM[1]
|
|
@@ -784,37 +807,48 @@ generate_interaction_plot_configs <- function(df, plot_type = "reference") {
|
|
annotations <- list(
|
|
annotations <- list(
|
|
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
|
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
|
list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
|
|
list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
|
|
|
|
+ list(x = 1, y = y_limits[2] - 0.4 * y_span, label = paste("R-squared =", R_squared_value)),
|
|
list(x = 1, y = y_limits[1] + 0.2 * y_span, label = paste("NG =", NG_value)),
|
|
list(x = 1, y = y_limits[1] + 0.2 * y_span, label = paste("NG =", NG_value)),
|
|
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("DB =", DB_value)),
|
|
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("DB =", DB_value)),
|
|
list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
|
|
list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
|
|
)
|
|
)
|
|
|
|
|
|
|
|
+ # Delta plot configuration with lm_line if needed
|
|
plot_config <- list(
|
|
plot_config <- list(
|
|
df = group_data,
|
|
df = group_data,
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
x_var = "conc_num_factor_factor",
|
|
x_var = "conc_num_factor_factor",
|
|
y_var = var,
|
|
y_var = var,
|
|
x_label = unique(group_data$Drug)[1],
|
|
x_label = unique(group_data$Drug)[1],
|
|
- title = paste(OrfRep, Gene, sep = " "),
|
|
|
|
|
|
+ title = paste(OrfRepCombined, Gene, sep = " "),
|
|
coord_cartesian = y_limits,
|
|
coord_cartesian = y_limits,
|
|
annotations = annotations,
|
|
annotations = annotations,
|
|
error_bar = TRUE,
|
|
error_bar = TRUE,
|
|
error_bar_params = list(
|
|
error_bar_params = list(
|
|
- ymin = error_bar_ymin,
|
|
|
|
- ymax = error_bar_ymax
|
|
|
|
|
|
+ ymin = 0 - (2 * WT_sd_value),
|
|
|
|
+ ymax = 0 + (2 * WT_sd_value)
|
|
),
|
|
),
|
|
smooth = TRUE,
|
|
smooth = TRUE,
|
|
x_breaks = unique(group_data$conc_num_factor_factor),
|
|
x_breaks = unique(group_data$conc_num_factor_factor),
|
|
x_labels = as.character(unique(group_data$conc_num)),
|
|
x_labels = as.character(unique(group_data$conc_num)),
|
|
- ylim_vals = y_limits
|
|
|
|
|
|
+ ylim_vals = y_limits,
|
|
|
|
+ lm_line = list(
|
|
|
|
+ intercept = group_data[[lm_intercept_col]][1],
|
|
|
|
+ slope = group_data[[lm_slope_col]][1]
|
|
|
|
+ )
|
|
)
|
|
)
|
|
delta_plot_configs <- append(delta_plot_configs, list(plot_config))
|
|
delta_plot_configs <- append(delta_plot_configs, list(plot_config))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ # Calculate dynamic grid layout based on the number of plots for the delta_L plots
|
|
|
|
+ grid_ncol <- 4
|
|
|
|
+ num_plots <- length(delta_plot_configs)
|
|
|
|
+ grid_nrow <- ceiling(num_plots / grid_ncol)
|
|
|
|
+
|
|
return(list(
|
|
return(list(
|
|
list(grid_layout = list(ncol = 2, nrow = 2), plots = overall_plot_configs),
|
|
list(grid_layout = list(ncol = 2, nrow = 2), plots = overall_plot_configs),
|
|
- list(grid_layout = list(ncol = 4, nrow = 3), plots = delta_plot_configs)
|
|
|
|
|
|
+ list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs)
|
|
))
|
|
))
|
|
}
|
|
}
|
|
|
|
|
|
@@ -902,44 +936,66 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|
return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs))
|
|
return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs))
|
|
}
|
|
}
|
|
|
|
|
|
-generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
|
|
|
|
|
|
+generate_correlation_plot_configs <- function(df, correlation_stats) {
|
|
|
|
+ # Define relationships for different-variable correlations
|
|
relationships <- list(
|
|
relationships <- list(
|
|
- list(x = "Z_lm_L", y = "Z_lm_K", label = "Interaction L vs. Interaction K"),
|
|
|
|
- list(x = "Z_lm_L", y = "Z_lm_r", label = "Interaction L vs. Interaction r"),
|
|
|
|
- list(x = "Z_lm_L", y = "Z_lm_AUC", label = "Interaction L vs. Interaction AUC"),
|
|
|
|
- list(x = "Z_lm_K", y = "Z_lm_r", label = "Interaction K vs. Interaction r"),
|
|
|
|
- list(x = "Z_lm_K", y = "Z_lm_AUC", label = "Interaction K vs. Interaction AUC"),
|
|
|
|
- list(x = "Z_lm_r", y = "Z_lm_AUC", label = "Interaction r vs. Interaction AUC")
|
|
|
|
|
|
+ list(x = "L", y = "K"),
|
|
|
|
+ list(x = "L", y = "r"),
|
|
|
|
+ list(x = "L", y = "AUC"),
|
|
|
|
+ list(x = "K", y = "r"),
|
|
|
|
+ list(x = "K", y = "AUC"),
|
|
|
|
+ list(x = "r", y = "AUC")
|
|
)
|
|
)
|
|
|
|
|
|
plot_configs <- list()
|
|
plot_configs <- list()
|
|
|
|
|
|
- for (rel in relationships) {
|
|
|
|
- lm_model <- lm(as.formula(paste(rel$y, "~", rel$x)), data = df)
|
|
|
|
- r_squared <- summary(lm_model)$r.squared
|
|
|
|
|
|
+ # Iterate over the option to highlight cyan points (TRUE/FALSE)
|
|
|
|
+ highlight_cyan_options <- c(FALSE, TRUE)
|
|
|
|
|
|
- plot_config <- list(
|
|
|
|
- df = df,
|
|
|
|
- x_var = rel$x,
|
|
|
|
- y_var = rel$y,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = rel$label,
|
|
|
|
- annotations = list(
|
|
|
|
- list(
|
|
|
|
- x = mean(df[[rel$x]], na.rm = TRUE),
|
|
|
|
- y = mean(df[[rel$y]], na.rm = TRUE),
|
|
|
|
- label = paste("R-squared =", round(r_squared, 3)))
|
|
|
|
- ),
|
|
|
|
- smooth = TRUE,
|
|
|
|
- smooth_color = "tomato3",
|
|
|
|
- lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]),
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.5,
|
|
|
|
- color_var = "Overlap",
|
|
|
|
- cyan_points = highlight_cyan
|
|
|
|
- )
|
|
|
|
|
|
+ for (highlight_cyan in highlight_cyan_options) {
|
|
|
|
+ for (rel in relationships) {
|
|
|
|
+ # Extract relevant variable names for Z_lm values
|
|
|
|
+ x_var <- paste0("Z_lm_", rel$x)
|
|
|
|
+ y_var <- paste0("Z_lm_", rel$y)
|
|
|
|
+
|
|
|
|
+ # Access the correlation statistics from the correlation_stats list
|
|
|
|
+ relationship_name <- paste0(rel$x, "_vs_", rel$y) # Example: L_vs_K
|
|
|
|
+ stats <- correlation_stats[[relationship_name]]
|
|
|
|
+ intercept <- stats$intercept
|
|
|
|
+ slope <- stats$slope
|
|
|
|
+ r_squared <- stats$r_squared
|
|
|
|
|
|
- plot_configs <- append(plot_configs, list(plot_config))
|
|
|
|
|
|
+ # Generate the label for the plot
|
|
|
|
+ plot_label <- paste("Interaction", rel$x, "vs.", rel$y)
|
|
|
|
+
|
|
|
|
+ # Construct plot config
|
|
|
|
+ plot_config <- list(
|
|
|
|
+ df = df,
|
|
|
|
+ x_var = x_var,
|
|
|
|
+ y_var = y_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = plot_label,
|
|
|
|
+ annotations = list(
|
|
|
|
+ list(
|
|
|
|
+ x = mean(df[[x_var]], na.rm = TRUE),
|
|
|
|
+ y = mean(df[[y_var]], na.rm = TRUE),
|
|
|
|
+ label = paste("R-squared =", round(r_squared, 3))
|
|
|
|
+ )
|
|
|
|
+ ),
|
|
|
|
+ smooth = TRUE,
|
|
|
|
+ smooth_color = "tomato3",
|
|
|
|
+ lm_line = list(
|
|
|
|
+ intercept = intercept,
|
|
|
|
+ slope = slope
|
|
|
|
+ ),
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.5,
|
|
|
|
+ color_var = "Overlap",
|
|
|
|
+ cyan_points = highlight_cyan # Include cyan points or not based on the loop
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ plot_configs <- append(plot_configs, list(plot_config))
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
return(list(plots = plot_configs))
|
|
return(list(plots = plot_configs))
|
|
@@ -1041,7 +1097,7 @@ main <- function() {
|
|
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
|
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
|
row.names = FALSE)
|
|
row.names = FALSE)
|
|
|
|
|
|
- # Each plots list corresponds to a file
|
|
|
|
|
|
+ # Each list of plots corresponds to a file
|
|
l_vs_k_plot_configs <- list(
|
|
l_vs_k_plot_configs <- list(
|
|
plots = list(
|
|
plots = list(
|
|
list(
|
|
list(
|
|
@@ -1147,7 +1203,7 @@ main <- function() {
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
|
|
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
|
|
color_var = "conc_num_factor_factor",
|
|
color_var = "conc_num_factor_factor",
|
|
- position = "jitter", # Apply jitter for better visibility
|
|
|
|
|
|
+ position = "jitter",
|
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
|
annotations = list(
|
|
annotations = list(
|
|
list(
|
|
list(
|
|
@@ -1171,7 +1227,7 @@ main <- function() {
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
|
|
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
|
|
color_var = "conc_num_factor_factor",
|
|
color_var = "conc_num_factor_factor",
|
|
- position = "jitter", # Apply jitter for better visibility
|
|
|
|
|
|
+ position = "jitter",
|
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
|
annotations = list(
|
|
annotations = list(
|
|
list(
|
|
list(
|
|
@@ -1187,7 +1243,7 @@ main <- function() {
|
|
)
|
|
)
|
|
|
|
|
|
message("Generating quality control plots in parallel")
|
|
message("Generating quality control plots in parallel")
|
|
- # # future::plan(future::multicore, workers = parallel::detectCores())
|
|
|
|
|
|
+ # future::plan(future::multicore, workers = parallel::detectCores())
|
|
future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
|
|
future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
|
|
|
|
|
|
plot_configs <- list(
|
|
plot_configs <- list(
|
|
@@ -1298,11 +1354,11 @@ main <- function() {
|
|
|
|
|
|
# Create interaction plots
|
|
# Create interaction plots
|
|
message("Generating reference interaction plots")
|
|
message("Generating reference interaction plots")
|
|
- reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, plot_type = "reference")
|
|
|
|
|
|
+ reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, "reference")
|
|
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
|
|
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
|
|
|
|
|
|
message("Generating deletion interaction plots")
|
|
message("Generating deletion interaction plots")
|
|
- deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, plot_type = "deletion")
|
|
|
|
|
|
+ deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, "deletion")
|
|
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
|
|
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
|
|
|
|
|
|
# Define conditions for enhancers and suppressors
|
|
# Define conditions for enhancers and suppressors
|
|
@@ -1372,29 +1428,6 @@ main <- function() {
|
|
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
|
|
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
|
|
plot_configs = rank_lm_plot_configs)
|
|
plot_configs = rank_lm_plot_configs)
|
|
|
|
|
|
- message("Filtering and reranking plots")
|
|
|
|
- interaction_threshold <- 2 # TODO add to study config?
|
|
|
|
- # Formerly X_NArm
|
|
|
|
- zscore_interactions_filtered <- zscore_interactions_joined %>%
|
|
|
|
- filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
|
|
|
|
- mutate(
|
|
|
|
- Overlap = case_when(
|
|
|
|
- Z_lm_L >= interaction_threshold & Avg_Zscore_L >= interaction_threshold ~ "Deletion Enhancer Both",
|
|
|
|
- Z_lm_L <= -interaction_threshold & Avg_Zscore_L <= -interaction_threshold ~ "Deletion Suppressor Both",
|
|
|
|
- Z_lm_L >= interaction_threshold & Avg_Zscore_L <= interaction_threshold ~ "Deletion Enhancer lm only",
|
|
|
|
- Z_lm_L <= interaction_threshold & Avg_Zscore_L >= interaction_threshold ~ "Deletion Enhancer Avg Zscore only",
|
|
|
|
- Z_lm_L <= -interaction_threshold & Avg_Zscore_L >= -interaction_threshold ~ "Deletion Suppressor lm only",
|
|
|
|
- Z_lm_L >= -interaction_threshold & Avg_Zscore_L <= -interaction_threshold ~ "Deletion Suppressor Avg Zscore only",
|
|
|
|
- Z_lm_L >= interaction_threshold & Avg_Zscore_L <= -interaction_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
|
|
|
|
- Z_lm_L <= -interaction_threshold & Avg_Zscore_L >= interaction_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
|
|
|
|
- TRUE ~ "No Effect"
|
|
|
|
- ),
|
|
|
|
- lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
|
|
|
|
- lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
|
|
|
|
- lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
|
|
|
|
- lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
message("Generating filtered ranked plots")
|
|
message("Generating filtered ranked plots")
|
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
df = zscore_interactions_filtered,
|
|
df = zscore_interactions_filtered,
|
|
@@ -1430,3 +1463,6 @@ main <- function() {
|
|
})
|
|
})
|
|
}
|
|
}
|
|
main()
|
|
main()
|
|
|
|
+
|
|
|
|
+# For future simplification of joined dataframes
|
|
|
|
+# df_joined <- left_join(cleaned_df, summary_stats, by = group_vars, suffix = c("_original", "_stats"))
|