Calculate ranks step-wise

This commit is contained in:
2024-10-08 11:59:24 -04:00
parent d456bcdadd
commit bff7e7cfb4

View File

@@ -222,7 +222,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
perform_lm <- function(response, predictor, max_conc) { perform_lm <- function(response, predictor, max_conc) {
if (all(is.na(response)) || all(is.na(predictor)) || if (all(is.na(response)) || all(is.na(predictor)) ||
length(response[!is.na(response)]) == 0 || length(predictor[!is.na(predictor)]) == 0) { length(response[!is.na(response)]) < 2 || length(predictor[!is.na(predictor)]) < 2) {
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA)) return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
} else { } else {
fit <- lm(response ~ predictor) fit <- lm(response ~ predictor)
@@ -337,53 +337,55 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1), Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1), Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1),
# Perform gene-gene linear modeling # Perform per-gene linear modeling
lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)), lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)), lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)), lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)), lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
# Extract coefficients and statistics for each model
lm_intercept_L = lm_L[[1]]$intercept,
lm_slope_L = lm_L[[1]]$slope,
R_Squared_L = lm_L[[1]]$r_squared,
lm_Score_L = lm_L[[1]]$score,
lm_intercept_K = lm_K[[1]]$intercept,
lm_slope_K = lm_K[[1]]$slope,
R_Squared_K = lm_K[[1]]$r_squared,
lm_Score_K = lm_K[[1]]$score,
lm_intercept_r = lm_r[[1]]$intercept,
lm_slope_r = lm_r[[1]]$slope,
R_Squared_r = lm_r[[1]]$r_squared,
lm_Score_r = lm_r[[1]]$score,
lm_intercept_AUC = lm_AUC[[1]]$intercept,
lm_slope_AUC = lm_AUC[[1]]$slope,
R_Squared_AUC = lm_AUC[[1]]$r_squared,
lm_Score_AUC = lm_AUC[[1]]$score,
.groups = "drop" .groups = "drop"
) %>% ) %>%
select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we have coefficients
# Summary statistics for lm scores
interactions <- interactions %>%
group_by(across(all_of(group_vars))) %>%
mutate( mutate(
lm_mean_L = mean(lm_Score_L, na.rm = TRUE), lm_intercept_L = map_dbl(lm_L, "intercept"),
lm_sd_L = sd(lm_Score_L, na.rm = TRUE), lm_slope_L = map_dbl(lm_L, "slope"),
lm_mean_K = mean(lm_Score_K, na.rm = TRUE), R_Squared_L = map_dbl(lm_L, "r_squared"),
lm_sd_K = sd(lm_Score_K, na.rm = TRUE), lm_Score_L = map_dbl(lm_L, "score"),
lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
lm_sd_r = sd(lm_Score_r, na.rm = TRUE), lm_intercept_K = map_dbl(lm_K, "intercept"),
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), lm_slope_K = map_dbl(lm_K, "slope"),
lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE), R_Squared_K = map_dbl(lm_K, "r_squared"),
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L, lm_Score_K = map_dbl(lm_K, "score"),
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r, lm_intercept_r = map_dbl(lm_r, "intercept"),
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC lm_slope_r = map_dbl(lm_r, "slope"),
R_Squared_r = map_dbl(lm_r, "r_squared"),
lm_Score_r = map_dbl(lm_r, "score"),
lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
lm_slope_AUC = map_dbl(lm_AUC, "slope"),
R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
lm_Score_AUC = map_dbl(lm_AUC, "score")
) %>%
select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we now have coefficients
lm_mean_L <- mean(interactions$lm_Score_L, na.rm = TRUE)
lm_sd_L <- sd(interactions$lm_Score_L, na.rm = TRUE)
lm_mean_K <- mean(interactions$lm_Score_K, na.rm = TRUE)
lm_sd_K <- sd(interactions$lm_Score_K, na.rm = TRUE)
lm_mean_r <- mean(interactions$lm_Score_r, na.rm = TRUE)
lm_sd_r <- sd(interactions$lm_Score_r, na.rm = TRUE)
lm_mean_AUC <- mean(interactions$lm_Score_AUC, na.rm = TRUE)
lm_sd_AUC <- sd(interactions$lm_Score_AUC, na.rm = TRUE)
interactions <- interactions %>%
mutate(
Z_lm_L = if (!is.na(lm_sd_L) && lm_sd_L != 0) (lm_Score_L - lm_mean_L) / lm_sd_L else NA_real_,
Z_lm_K = if (!is.na(lm_sd_K) && lm_sd_K != 0) (lm_Score_K - lm_mean_K) / lm_sd_K else NA_real_,
Z_lm_r = if (!is.na(lm_sd_r) && lm_sd_r != 0) (lm_Score_r - lm_mean_r) / lm_sd_r else NA_real_,
Z_lm_AUC = if (!is.na(lm_sd_AUC) && lm_sd_AUC != 0) (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC else NA_real_
) %>% ) %>%
arrange(desc(Z_lm_L), desc(NG)) arrange(desc(Z_lm_L), desc(NG))
# Deletion data ranking and linear modeling # Deletion data ranking and linear modeling
# Initialize this variable so we can return it as NULL for reference # Initialize this variable so we can return it as NULL for reference
# We don't do the following calculations for the reference strain # We don't do the following calculations for the reference strain
@@ -408,11 +410,12 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Rank_lm_L = rank(Z_lm_adjusted_L), Rank_lm_L = rank(Z_lm_adjusted_L),
Rank_lm_K = rank(Z_lm_adjusted_K), Rank_lm_K = rank(Z_lm_adjusted_K),
Rank_lm_r = rank(Z_lm_adjusted_r), Rank_lm_r = rank(Z_lm_adjusted_r),
Rank_lm_AUC = rank(Z_lm_adjusted_AUC), Rank_lm_AUC = rank(Z_lm_adjusted_AUC)
Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)), # Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)), # Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)), # Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)), # Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
) )
# We are filtering invalid Z_lm scores so this must be in its own df # We are filtering invalid Z_lm scores so this must be in its own df
@@ -443,10 +446,10 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)), Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)), Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)), Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
# Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared, Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared,
# Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared, Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared,
# Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared, Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared,
# Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared, Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared,
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)), Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)), Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)), Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
@@ -497,7 +500,11 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope, Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared, Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
) ) %>%
select(-c(Rank_lm_lm_L, Rank_lm_lm_K, Rank_lm_lm_r, Rank_lm_lm_AUC,
Correlation_lm_L, Correlation_lm_K, Correlation_lm_r, Correlation_lm_AUC,
Correlation_lm_K_L, Correlation_lm_r_L, Correlation_lm_AUC_L,
Correlation_lm_r_K, Correlation_lm_AUC_K, Correlation_lm_AUC_r))
} }
# Create the final calculations and interactions dataframes with specific columns for csv output # Create the final calculations and interactions dataframes with specific columns for csv output
@@ -625,6 +632,23 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
) )
} }
# Add annotations if specified
if (!is.null(config$annotations)) {
for (annotation in config$annotations) {
plot <- plot +
annotate(
"text",
x = ifelse(is.null(annotation$x), 0, annotation$x),
y = ifelse(is.null(annotation$y), Inf, annotation$y),
label = annotation$label,
hjust = ifelse(is.null(annotation$hjust), 0.5, annotation$hjust),
vjust = ifelse(is.null(annotation$vjust), 1, annotation$vjust),
size = ifelse(is.null(annotation$size), 3, annotation$size),
color = ifelse(is.null(annotation$color), "black", annotation$color)
)
}
}
if (!is.null(config$grid_lines)) { if (!is.null(config$grid_lines)) {
plot <- plot + theme( plot <- plot + theme(
panel.grid.major = if (config$grid_lines$major) element_line() else element_blank(), panel.grid.major = if (config$grid_lines$major) element_line() else element_blank(),
@@ -783,6 +807,14 @@ generate_scatter_plot <- function(plot, config) {
# Extract necessary values # Extract necessary values
intercept <- config$lm_line$intercept # required intercept <- config$lm_line$intercept # required
slope <- config$lm_line$slope # required slope <- config$lm_line$slope # required
message("----- Generating lm_line -----")
message("Intercept: ", intercept)
message("Slope: ", slope)
message("Class: ", class(config$df[[config$x_var]]))
message("------------------------------")
xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]]))) xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]])))
xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]]))) xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]])))
color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue") color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue")
@@ -1183,7 +1215,7 @@ generate_rank_plot_configs <- function(df, is_lm = FALSE, filter_na = FALSE, ove
# Generate plots for each variable # Generate plots for each variable
for (variable in variables) { for (variable in variables) {
rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable) rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable) zscore_var <- if (is_lm) paste0("Z_lm_adjusted_", variable) else paste0("Avg_Zscore_adjusted_", variable)
y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable) y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)
# Loop through SD bands # Loop through SD bands
@@ -1614,9 +1646,9 @@ main <- function() {
write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Generating reference interaction plots") # message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference") # reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
deletion_adjusted <- stats_na %>% # formerly X2 deletion_adjusted <- stats_na %>% # formerly X2
@@ -1647,9 +1679,9 @@ main <- function() {
write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
message("Generating deletion interaction plots") # message("Generating deletion interaction plots")
deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion") # deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion")
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16) # generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
message("Writing enhancer/suppressor csv files") message("Writing enhancer/suppressor csv files")
interaction_threshold <- 2 # TODO add to study config? interaction_threshold <- 2 # TODO add to study config?