Move correlation modeling to calculate_interction_scores
This commit is contained in:
@@ -214,7 +214,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
|||||||
|
|
||||||
# Calculate WT statistics from df_bg
|
# Calculate WT statistics from df_bg
|
||||||
wt_stats <- df_bg %>%
|
wt_stats <- df_bg %>%
|
||||||
filter(conc_num == 0) %>% # use the zero drug concentration background
|
filter(conc_num == 0) %>%
|
||||||
summarise(
|
summarise(
|
||||||
WT_L = mean(mean_L, na.rm = TRUE),
|
WT_L = mean(mean_L, na.rm = TRUE),
|
||||||
WT_sd_L = mean(sd_L, na.rm = TRUE),
|
WT_sd_L = mean(sd_L, na.rm = TRUE),
|
||||||
@@ -302,31 +302,48 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
|||||||
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
||||||
) %>%
|
) %>%
|
||||||
group_modify(~ {
|
group_modify(~ {
|
||||||
# Perform linear models
|
# Perform linear models only if there are enough unique conc_num_factor levels
|
||||||
lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
|
if (length(unique(.x$conc_num_factor)) > 1) {
|
||||||
lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
|
|
||||||
lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
|
|
||||||
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
|
|
||||||
|
|
||||||
.x %>%
|
# Filter and calculate each lm() separately with individual checks for NAs
|
||||||
mutate(
|
lm_L <- if (!all(is.na(.x$Delta_L))) tryCatch(lm(Delta_L ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL
|
||||||
lm_intercept_L = coef(lm_L)[1],
|
lm_K <- if (!all(is.na(.x$Delta_K))) tryCatch(lm(Delta_K ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL
|
||||||
lm_slope_L = coef(lm_L)[2],
|
lm_r <- if (!all(is.na(.x$Delta_r))) tryCatch(lm(Delta_r ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL
|
||||||
R_Squared_L = summary(lm_L)$r.squared,
|
lm_AUC <- if (!all(is.na(.x$Delta_AUC))) tryCatch(lm(Delta_AUC ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL
|
||||||
lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
|
|
||||||
lm_intercept_K = coef(lm_K)[1],
|
# Mutate results for each lm if it was successfully calculated, suppress warnings for perfect fits
|
||||||
lm_slope_K = coef(lm_K)[2],
|
.x %>%
|
||||||
R_Squared_K = summary(lm_K)$r.squared,
|
mutate(
|
||||||
lm_Score_K = max_conc * lm_slope_K + lm_intercept_K,
|
lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
|
||||||
lm_intercept_r = coef(lm_r)[1],
|
lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
|
||||||
lm_slope_r = coef(lm_r)[2],
|
R_Squared_L = if (!is.null(lm_L)) suppressWarnings(summary(lm_L)$r.squared) else NA,
|
||||||
R_Squared_r = summary(lm_r)$r.squared,
|
lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
|
||||||
lm_Score_r = max_conc * lm_slope_r + lm_intercept_r,
|
|
||||||
lm_intercept_AUC = coef(lm_AUC)[1],
|
lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
|
||||||
lm_slope_AUC = coef(lm_AUC)[2],
|
lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
|
||||||
R_Squared_AUC = summary(lm_AUC)$r.squared,
|
R_Squared_K = if (!is.null(lm_K)) suppressWarnings(summary(lm_K)$r.squared) else NA,
|
||||||
lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
|
lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
|
||||||
)
|
|
||||||
|
lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
|
||||||
|
lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
|
||||||
|
R_Squared_r = if (!is.null(lm_r)) suppressWarnings(summary(lm_r)$r.squared) else NA,
|
||||||
|
lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
|
||||||
|
|
||||||
|
lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
|
||||||
|
lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
|
||||||
|
R_Squared_AUC = if (!is.null(lm_AUC)) suppressWarnings(summary(lm_AUC)$r.squared) else NA,
|
||||||
|
lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
# If not enough conc_num_factor levels, set lm-related values to NA
|
||||||
|
.x %>%
|
||||||
|
mutate(
|
||||||
|
lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA,
|
||||||
|
lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA,
|
||||||
|
lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA,
|
||||||
|
lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA
|
||||||
|
)
|
||||||
|
}
|
||||||
}) %>%
|
}) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
@@ -344,6 +361,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
|||||||
.groups = "drop"
|
.groups = "drop"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add lm score means and standard deviations to calculations
|
||||||
calculations <- calculations %>%
|
calculations <- calculations %>%
|
||||||
mutate(
|
mutate(
|
||||||
lm_mean_L = lm_means_sds$lm_mean_L,
|
lm_mean_L = lm_means_sds$lm_mean_L,
|
||||||
@@ -356,7 +374,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
|||||||
lm_sd_AUC = lm_means_sds$lm_sd_AUC
|
lm_sd_AUC = lm_means_sds$lm_sd_AUC
|
||||||
)
|
)
|
||||||
|
|
||||||
# Continue with gene Z-scores and interactions
|
# Calculate Z-lm scores
|
||||||
calculations <- calculations %>%
|
calculations <- calculations %>%
|
||||||
mutate(
|
mutate(
|
||||||
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
|
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
|
||||||
@@ -406,12 +424,34 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
|||||||
.groups = "drop"
|
.groups = "drop"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add overlap threshold categories based on Z-lm and Avg-Z scores
|
||||||
|
interactions <- interactions %>%
|
||||||
|
filter(!is.na(Z_lm_L)) %>%
|
||||||
|
mutate(
|
||||||
|
Overlap = case_when(
|
||||||
|
Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
|
||||||
|
Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
|
||||||
|
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only",
|
||||||
|
Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
|
||||||
|
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only",
|
||||||
|
Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
|
||||||
|
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Zscore",
|
||||||
|
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
||||||
|
TRUE ~ "No Effect"
|
||||||
|
),
|
||||||
|
|
||||||
|
# For correlations
|
||||||
|
lm_R_squared_L = if (!all(is.na(Z_lm_L)) && !all(is.na(Avg_Zscore_L))) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
|
||||||
|
lm_R_squared_K = if (!all(is.na(Z_lm_K)) && !all(is.na(Avg_Zscore_K))) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
|
||||||
|
lm_R_squared_r = if (!all(is.na(Z_lm_r)) && !all(is.na(Avg_Zscore_r))) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA,
|
||||||
|
lm_R_squared_AUC = if (!all(is.na(Z_lm_AUC)) && !all(is.na(Avg_Zscore_AUC))) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA
|
||||||
|
)
|
||||||
|
|
||||||
# Creating the final calculations and interactions dataframes with only required columns for csv output
|
# Creating the final calculations and interactions dataframes with only required columns for csv output
|
||||||
calculations_df <- calculations %>%
|
calculations_df <- calculations %>%
|
||||||
select(
|
select(
|
||||||
all_of(group_vars),
|
all_of(group_vars),
|
||||||
conc_num, conc_num_factor, conc_num_factor_factor,
|
conc_num, conc_num_factor, conc_num_factor_factor, N,
|
||||||
N, NG, DB, SM,
|
|
||||||
mean_L, median_L, sd_L, se_L,
|
mean_L, median_L, sd_L, se_L,
|
||||||
mean_K, median_K, sd_K, se_K,
|
mean_K, median_K, sd_K, se_K,
|
||||||
mean_r, median_r, sd_r, se_r,
|
mean_r, median_r, sd_r, se_r,
|
||||||
@@ -432,23 +472,22 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
|||||||
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
|
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
|
||||||
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
|
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
|
||||||
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
||||||
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
|
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
|
||||||
|
lm_R_squared_L, lm_R_squared_K, lm_R_squared_r, lm_R_squared_AUC,
|
||||||
|
Overlap
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Join calculations and interactions to avoid dimension mismatch
|
||||||
calculations_no_overlap <- calculations %>%
|
calculations_no_overlap <- calculations %>%
|
||||||
# DB, NG, SM are same as in interactions, the rest may be different and need to be checked
|
select(-any_of(c("DB", "NG", "SM",
|
||||||
select(-any_of(c(
|
|
||||||
"DB", "NG", "SM",
|
|
||||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||||
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"
|
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC")))
|
||||||
)))
|
|
||||||
|
|
||||||
# Use left_join to avoid dimension mismatch issues
|
|
||||||
full_data <- calculations_no_overlap %>%
|
full_data <- calculations_no_overlap %>%
|
||||||
left_join(interactions, by = group_vars)
|
left_join(interactions_df, by = group_vars)
|
||||||
|
|
||||||
# Return full_data and the two required dataframes (calculations and interactions)
|
# Return final dataframes
|
||||||
return(list(
|
return(list(
|
||||||
calculations = calculations_df,
|
calculations = calculations_df,
|
||||||
interactions = interactions_df,
|
interactions = interactions_df,
|
||||||
@@ -794,7 +833,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
stats_boxplot_configs <- list()
|
stats_boxplot_configs <- list()
|
||||||
delta_plot_configs <- list()
|
delta_plot_configs <- list()
|
||||||
|
|
||||||
# Overall statistics plots (use df)
|
# Overall statistics plots
|
||||||
OrfRep <- first(df$OrfRep) # this should correspond to the reference strain
|
OrfRep <- first(df$OrfRep) # this should correspond to the reference strain
|
||||||
|
|
||||||
for (plot_type in c("scatter", "box")) {
|
for (plot_type in c("scatter", "box")) {
|
||||||
@@ -828,24 +867,23 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
)
|
)
|
||||||
plot_config$position <- "jitter"
|
plot_config$position <- "jitter"
|
||||||
|
|
||||||
# Annotation labels
|
annotations <- list(
|
||||||
annotations <- list(
|
list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = " NG ="), # Slightly above y-min
|
||||||
list(x = 0, y = y_limits[1] + 0.1 * y_span, label = "NG ="),
|
list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = " DB ="),
|
||||||
list(x = 0, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
|
list(x = 0.25, y = y_limits[1], label = " SM =")
|
||||||
list(x = 0, y = y_limits[1], label = "SM =")
|
)
|
||||||
)
|
|
||||||
|
|
||||||
# Annotation values
|
# Loop over unique x values and add NG, DB, SM values at calculated y positions
|
||||||
for (x_val in unique(df$conc_num_factor_factor)) {
|
for (x_val in unique(df$conc_num_factor_factor)) {
|
||||||
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
|
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
|
||||||
annotations <- append(annotations, list(
|
annotations <- append(annotations, list(
|
||||||
list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = sum(current_df$NG, na.rm = TRUE)),
|
list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = first(current_df$NG, default = 0)),
|
||||||
list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = sum(current_df$DB, na.rm = TRUE)),
|
list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = first(current_df$DB, default = 0)),
|
||||||
list(x = x_val, y = y_limits[1], label = sum(current_df$SM, na.rm = TRUE))
|
list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0))
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
plot_config$annotations <- annotations
|
plot_config$annotations <- annotations
|
||||||
|
|
||||||
# Append to scatter plot configurations
|
# Append to scatter plot configurations
|
||||||
stats_plot_configs <- append(stats_plot_configs, list(plot_config))
|
stats_plot_configs <- append(stats_plot_configs, list(plot_config))
|
||||||
@@ -861,6 +899,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Delta interaction plots
|
||||||
if (type == "reference") {
|
if (type == "reference") {
|
||||||
group_vars <- c("OrfRep", "Gene", "num")
|
group_vars <- c("OrfRep", "Gene", "num")
|
||||||
} else if (type == "deletion") {
|
} else if (type == "deletion") {
|
||||||
@@ -874,7 +913,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
AUC = c(-6000, 6000)
|
AUC = c(-6000, 6000)
|
||||||
)
|
)
|
||||||
|
|
||||||
grouped_data <- df_calculations %>%
|
grouped_data <- df %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
group_split()
|
group_split()
|
||||||
|
|
||||||
@@ -889,11 +928,6 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
OrfRepTitle <- OrfRep
|
OrfRepTitle <- OrfRep
|
||||||
}
|
}
|
||||||
|
|
||||||
# Get corresponding interaction row
|
|
||||||
interaction_row <- df_interactions %>%
|
|
||||||
filter(if_all(all_of(group_vars), ~ . == first(.))) %>%
|
|
||||||
slice(1)
|
|
||||||
|
|
||||||
for (var in names(delta_limits_map)) {
|
for (var in names(delta_limits_map)) {
|
||||||
y_limits <- delta_limits_map[[var]]
|
y_limits <- delta_limits_map[[var]]
|
||||||
y_span <- y_limits[2] - y_limits[1]
|
y_span <- y_limits[2] - y_limits[1]
|
||||||
@@ -901,17 +935,17 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
# Error bars
|
# Error bars
|
||||||
WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
|
WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
|
||||||
|
|
||||||
# Z_Shift and lm values from interaction_row
|
# Z_Shift and lm values
|
||||||
Z_Shift_value <- round(first(interaction_row[[paste0("Z_Shift_", var)]], default = 0), 2)
|
Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2)
|
||||||
Z_lm_value <- round(first(interaction_row[[paste0("Z_lm_", var)]], default = 0), 2)
|
Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2)
|
||||||
R_squared_value <- round(first(interaction_row[[paste0("R_Squared_", var)]], default = 0), 2)
|
R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2)
|
||||||
|
|
||||||
# NG, DB, SM values from interaction_row
|
# NG, DB, SM values
|
||||||
NG_value <- first(interaction_row$NG, default = 0)
|
NG_value <- first(group_data$NG, default = 0)
|
||||||
DB_value <- first(interaction_row$DB, default = 0)
|
DB_value <- first(group_data$DB, default = 0)
|
||||||
SM_value <- first(interaction_row$SM, default = 0)
|
SM_value <- first(group_data$SM, default = 0)
|
||||||
|
|
||||||
# Use the pre-calculated lm intercept and slope from group_data
|
# Use the pre-calculated lm intercept and slope from the dataframe
|
||||||
lm_intercept_col <- paste0("lm_intercept_", var)
|
lm_intercept_col <- paste0("lm_intercept_", var)
|
||||||
lm_slope_col <- paste0("lm_slope_", var)
|
lm_slope_col <- paste0("lm_slope_", var)
|
||||||
lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
|
lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
|
||||||
@@ -935,9 +969,8 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
),
|
),
|
||||||
error_bar = TRUE,
|
error_bar = TRUE,
|
||||||
error_bar_params = list(
|
error_bar_params = list(
|
||||||
# Passing constants directly
|
ymin = 0 - (2 * WT_sd_value),
|
||||||
ymin = -2 * WT_sd_value,
|
ymax = 0 + (2 * WT_sd_value),
|
||||||
ymax = 2 * WT_sd_value,
|
|
||||||
color = "black"
|
color = "black"
|
||||||
),
|
),
|
||||||
smooth = TRUE,
|
smooth = TRUE,
|
||||||
@@ -961,36 +994,34 @@ generate_interaction_plot_configs <- function(df, type) {
|
|||||||
return(list(
|
return(list(
|
||||||
list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs),
|
list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs),
|
||||||
list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs),
|
list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs),
|
||||||
list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = delta_plot_configs)
|
list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs)
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
|
generate_rank_plot_configs <- function(df, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
|
||||||
|
|
||||||
plot_configs <- list()
|
|
||||||
sd_bands <- c(1, 2, 3)
|
sd_bands <- c(1, 2, 3)
|
||||||
|
plot_configs <- list()
|
||||||
|
|
||||||
|
variables <- c("L", "K")
|
||||||
|
|
||||||
# Adjust (if necessary) and rank columns
|
# Adjust (if necessary) and rank columns
|
||||||
variables <- c("L", "K")
|
|
||||||
for (variable in variables) {
|
for (variable in variables) {
|
||||||
if (adjust) {
|
if (adjust) {
|
||||||
df_interactions[[paste0("Avg_Zscore_", variable)]] <-
|
df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]])
|
||||||
ifelse(is.na(df_interactions[[paste0("Avg_Zscore_", variable)]]), 0.001, df_interactions[[paste0("Avg_Zscore_", variable)]])
|
df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]])
|
||||||
df_interactions[[paste0("Z_lm_", variable)]] <-
|
|
||||||
ifelse(is.na(df_interactions[[paste0("Z_lm_", variable)]]), 0.001, df_interactions[[paste0("Z_lm_", variable)]])
|
|
||||||
}
|
}
|
||||||
df_interactions[[paste0("Rank_", variable)]] <- rank(df_interactions[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
|
df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep")
|
||||||
df_interactions[[paste0("Rank_lm_", variable)]] <- rank(df_interactions[[paste0("Z_lm_", variable)]], na.last = "keep")
|
df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep")
|
||||||
}
|
}
|
||||||
|
|
||||||
# Helper function to create a plot configuration
|
# Helper function to create a plot configuration
|
||||||
create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) {
|
create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) {
|
||||||
num_enhancers <- sum(df_interactions[[zscore_var]] >= sd_band, na.rm = TRUE)
|
num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||||
num_suppressors <- sum(df_interactions[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
num_suppressors <- sum(df[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
||||||
|
|
||||||
# Default plot config
|
# Default plot config
|
||||||
plot_config <- list(
|
plot_config <- list(
|
||||||
df = df_interactions,
|
df = df,
|
||||||
x_var = rank_var,
|
x_var = rank_var,
|
||||||
y_var = zscore_var,
|
y_var = zscore_var,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
@@ -1012,13 +1043,13 @@ generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust =
|
|||||||
# Add specific annotations for plots with annotations
|
# Add specific annotations for plots with annotations
|
||||||
plot_config$annotations <- list(
|
plot_config$annotations <- list(
|
||||||
list(
|
list(
|
||||||
x = median(df_interactions[[rank_var]], na.rm = TRUE),
|
x = median(df[[rank_var]], na.rm = TRUE),
|
||||||
y = max(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9,
|
y = max(df[[zscore_var]], na.rm = TRUE) * 0.9,
|
||||||
label = paste("Deletion Enhancers =", num_enhancers)
|
label = paste("Deletion Enhancers =", num_enhancers)
|
||||||
),
|
),
|
||||||
list(
|
list(
|
||||||
x = median(df_interactions[[rank_var]], na.rm = TRUE),
|
x = median(df[[rank_var]], na.rm = TRUE),
|
||||||
y = min(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9,
|
y = min(df[[zscore_var]], na.rm = TRUE) * 0.9,
|
||||||
label = paste("Deletion Suppressors =", num_suppressors)
|
label = paste("Deletion Suppressors =", num_suppressors)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -1051,7 +1082,7 @@ generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust =
|
|||||||
return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs))
|
return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs))
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_correlation_plot_configs <- function(df_interactions) {
|
generate_correlation_plot_configs <- function(df, correlation_stats) {
|
||||||
# Define relationships for different-variable correlations
|
# Define relationships for different-variable correlations
|
||||||
relationships <- list(
|
relationships <- list(
|
||||||
list(x = "L", y = "K"),
|
list(x = "L", y = "K"),
|
||||||
@@ -1062,23 +1093,6 @@ generate_correlation_plot_configs <- function(df_interactions) {
|
|||||||
list(x = "r", y = "AUC")
|
list(x = "r", y = "AUC")
|
||||||
)
|
)
|
||||||
|
|
||||||
correlation_stats <- list()
|
|
||||||
|
|
||||||
for (rel in relationships) {
|
|
||||||
x_var <- paste0("Z_lm_", rel$x)
|
|
||||||
y_var <- paste0("Z_lm_", rel$y)
|
|
||||||
lm_fit <- lm(df_interactions[[y_var]] ~ df_interactions[[x_var]])
|
|
||||||
intercept <- coef(lm_fit)[1]
|
|
||||||
slope <- coef(lm_fit)[2]
|
|
||||||
r_squared <- summary(lm_fit)$r.squared
|
|
||||||
relationship_name <- paste0(rel$x, "_vs_", rel$y)
|
|
||||||
correlation_stats[[relationship_name]] <- list(
|
|
||||||
intercept = intercept,
|
|
||||||
slope = slope,
|
|
||||||
r_squared = r_squared
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
plot_configs <- list()
|
plot_configs <- list()
|
||||||
|
|
||||||
# Iterate over the option to highlight cyan points (TRUE/FALSE)
|
# Iterate over the option to highlight cyan points (TRUE/FALSE)
|
||||||
@@ -1102,15 +1116,15 @@ generate_correlation_plot_configs <- function(df_interactions) {
|
|||||||
|
|
||||||
# Construct plot config
|
# Construct plot config
|
||||||
plot_config <- list(
|
plot_config <- list(
|
||||||
df = df_interactions,
|
df = df,
|
||||||
x_var = x_var,
|
x_var = x_var,
|
||||||
y_var = y_var,
|
y_var = y_var,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
title = plot_label,
|
title = plot_label,
|
||||||
annotations = list(
|
annotations = list(
|
||||||
list(
|
list(
|
||||||
x = mean(df_interactions[[x_var]], na.rm = TRUE),
|
x = mean(df[[x_var]], na.rm = TRUE),
|
||||||
y = mean(df_interactions[[y_var]], na.rm = TRUE),
|
y = mean(df[[y_var]], na.rm = TRUE),
|
||||||
label = paste("R-squared =", round(r_squared, 3))
|
label = paste("R-squared =", round(r_squared, 3))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
@@ -1426,39 +1440,39 @@ main <- function() {
|
|||||||
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||||
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||||
|
|
||||||
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
# message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
||||||
df_deletion <- df_na_stats %>% # formerly X2
|
# df_deletion <- df_na_stats %>% # formerly X2
|
||||||
filter(OrfRep != strain) %>%
|
# filter(OrfRep != strain) %>%
|
||||||
filter(!is.na(L)) %>%
|
# filter(!is.na(L)) %>%
|
||||||
group_by(OrfRep, Gene, conc_num) %>%
|
# group_by(OrfRep, Gene, conc_num) %>%
|
||||||
mutate(
|
# mutate(
|
||||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
# max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
# L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||||
SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
|
# SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
|
||||||
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
# L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||||
ungroup()
|
# ungroup()
|
||||||
|
|
||||||
message("Calculating deletion strain(s) summary statistics")
|
# message("Calculating deletion strain(s) summary statistics")
|
||||||
df_deletion_stats <- calculate_summary_stats(
|
# df_deletion_stats <- calculate_summary_stats(
|
||||||
df = df_deletion,
|
# df = df_deletion,
|
||||||
variables = c("L", "K", "r", "AUC"),
|
# variables = c("L", "K", "r", "AUC"),
|
||||||
group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
|
# group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
|
||||||
)$df_with_stats
|
# )$df_with_stats
|
||||||
message("Calculating deletion strain(s) interactions scores")
|
|
||||||
results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
|
# message("Calculating deletion strain(s) interactions scores")
|
||||||
df_calculations <- results$calculations
|
# results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
|
||||||
df_interactions <- results$interactions
|
# df_calculations <- results$calculations
|
||||||
df_interactions_joined <- results$full_data
|
# df_interactions <- results$interactions
|
||||||
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
# df_interactions_joined <- results$full_data
|
||||||
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
# write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
||||||
|
# write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Create interaction plots
|
|
||||||
message("Generating reference interaction plots")
|
message("Generating reference interaction plots")
|
||||||
reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, df_bg_stats, "reference")
|
reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, "reference")
|
||||||
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
|
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
|
||||||
|
|
||||||
message("Generating deletion interaction plots")
|
message("Generating deletion interaction plots")
|
||||||
deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, df_bg_stats, "deletion")
|
deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, "deletion")
|
||||||
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
|
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
|
||||||
|
|
||||||
message("Writing enhancer/suppressor csv files")
|
message("Writing enhancer/suppressor csv files")
|
||||||
@@ -1495,7 +1509,7 @@ main <- function() {
|
|||||||
|
|
||||||
message("Generating rank plots")
|
message("Generating rank plots")
|
||||||
rank_plot_configs <- generate_rank_plot_configs(
|
rank_plot_configs <- generate_rank_plot_configs(
|
||||||
df_interactions,
|
df_interactions_joined,
|
||||||
is_lm = FALSE,
|
is_lm = FALSE,
|
||||||
adjust = TRUE
|
adjust = TRUE
|
||||||
)
|
)
|
||||||
@@ -1504,37 +1518,16 @@ main <- function() {
|
|||||||
|
|
||||||
message("Generating ranked linear model plots")
|
message("Generating ranked linear model plots")
|
||||||
rank_lm_plot_configs <- generate_rank_plot_configs(
|
rank_lm_plot_configs <- generate_rank_plot_configs(
|
||||||
df_interactions,
|
df_interactions_joined,
|
||||||
is_lm = TRUE,
|
is_lm = TRUE,
|
||||||
adjust = TRUE
|
adjust = TRUE
|
||||||
)
|
)
|
||||||
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
|
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
|
||||||
plot_configs = rank_lm_plot_configs)
|
plot_configs = rank_lm_plot_configs)
|
||||||
|
|
||||||
overlap_threshold <- 2 # TODO add to study config?
|
|
||||||
df_interactions_filtered <- df_interactions %>%
|
|
||||||
filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
|
|
||||||
mutate(
|
|
||||||
Overlap = case_when(
|
|
||||||
Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
|
|
||||||
Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
|
|
||||||
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only",
|
|
||||||
Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
|
|
||||||
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only",
|
|
||||||
Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
|
|
||||||
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
|
|
||||||
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
|
|
||||||
TRUE ~ "No Effect"
|
|
||||||
),
|
|
||||||
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
|
|
||||||
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
|
|
||||||
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
|
|
||||||
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
|
||||||
)
|
|
||||||
|
|
||||||
message("Generating filtered ranked plots")
|
message("Generating filtered ranked plots")
|
||||||
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
||||||
df_interactions_filtered,
|
df_interactions_joined,
|
||||||
is_lm = FALSE,
|
is_lm = FALSE,
|
||||||
adjust = FALSE,
|
adjust = FALSE,
|
||||||
overlap_color = TRUE
|
overlap_color = TRUE
|
||||||
@@ -1546,7 +1539,7 @@ main <- function() {
|
|||||||
|
|
||||||
message("Generating filtered ranked linear model plots")
|
message("Generating filtered ranked linear model plots")
|
||||||
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
||||||
df_interactions_filtered,
|
df_interactions_joined,
|
||||||
is_lm = TRUE,
|
is_lm = TRUE,
|
||||||
adjust = FALSE,
|
adjust = FALSE,
|
||||||
overlap_color = TRUE
|
overlap_color = TRUE
|
||||||
@@ -1558,7 +1551,7 @@ main <- function() {
|
|||||||
|
|
||||||
message("Generating correlation curve parameter pair plots")
|
message("Generating correlation curve parameter pair plots")
|
||||||
correlation_plot_configs <- generate_correlation_plot_configs(
|
correlation_plot_configs <- generate_correlation_plot_configs(
|
||||||
df_interactions_filtered
|
df_interactions_joined
|
||||||
)
|
)
|
||||||
generate_and_save_plots(
|
generate_and_save_plots(
|
||||||
out_dir = out_dir,
|
out_dir = out_dir,
|
||||||
|
|||||||
Reference in New Issue
Block a user