Calculate ranks step-wise
This commit is contained in:
@@ -222,7 +222,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
|
|
||||||
perform_lm <- function(response, predictor, max_conc) {
|
perform_lm <- function(response, predictor, max_conc) {
|
||||||
if (all(is.na(response)) || all(is.na(predictor)) ||
|
if (all(is.na(response)) || all(is.na(predictor)) ||
|
||||||
length(response[!is.na(response)]) == 0 || length(predictor[!is.na(predictor)]) == 0) {
|
length(response[!is.na(response)]) < 2 || length(predictor[!is.na(predictor)]) < 2) {
|
||||||
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
|
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
|
||||||
} else {
|
} else {
|
||||||
fit <- lm(response ~ predictor)
|
fit <- lm(response ~ predictor)
|
||||||
@@ -337,52 +337,54 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
|
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
|
||||||
Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1),
|
Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1),
|
||||||
|
|
||||||
# Perform gene-gene linear modeling
|
# Perform per-gene linear modeling
|
||||||
lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
|
lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
|
||||||
lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
|
lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
|
||||||
lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
|
lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
|
||||||
lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
|
lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
|
||||||
|
|
||||||
# Extract coefficients and statistics for each model
|
|
||||||
lm_intercept_L = lm_L[[1]]$intercept,
|
|
||||||
lm_slope_L = lm_L[[1]]$slope,
|
|
||||||
R_Squared_L = lm_L[[1]]$r_squared,
|
|
||||||
lm_Score_L = lm_L[[1]]$score,
|
|
||||||
lm_intercept_K = lm_K[[1]]$intercept,
|
|
||||||
lm_slope_K = lm_K[[1]]$slope,
|
|
||||||
R_Squared_K = lm_K[[1]]$r_squared,
|
|
||||||
lm_Score_K = lm_K[[1]]$score,
|
|
||||||
lm_intercept_r = lm_r[[1]]$intercept,
|
|
||||||
lm_slope_r = lm_r[[1]]$slope,
|
|
||||||
R_Squared_r = lm_r[[1]]$r_squared,
|
|
||||||
lm_Score_r = lm_r[[1]]$score,
|
|
||||||
lm_intercept_AUC = lm_AUC[[1]]$intercept,
|
|
||||||
lm_slope_AUC = lm_AUC[[1]]$slope,
|
|
||||||
R_Squared_AUC = lm_AUC[[1]]$r_squared,
|
|
||||||
lm_Score_AUC = lm_AUC[[1]]$score,
|
|
||||||
|
|
||||||
.groups = "drop"
|
.groups = "drop"
|
||||||
) %>%
|
) %>%
|
||||||
select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we have coefficients
|
|
||||||
|
|
||||||
# Summary statistics for lm scores
|
|
||||||
interactions <- interactions %>%
|
|
||||||
group_by(across(all_of(group_vars))) %>%
|
|
||||||
mutate(
|
mutate(
|
||||||
lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
|
lm_intercept_L = map_dbl(lm_L, "intercept"),
|
||||||
lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
|
lm_slope_L = map_dbl(lm_L, "slope"),
|
||||||
lm_mean_K = mean(lm_Score_K, na.rm = TRUE),
|
R_Squared_L = map_dbl(lm_L, "r_squared"),
|
||||||
lm_sd_K = sd(lm_Score_K, na.rm = TRUE),
|
lm_Score_L = map_dbl(lm_L, "score"),
|
||||||
lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
|
|
||||||
lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
|
lm_intercept_K = map_dbl(lm_K, "intercept"),
|
||||||
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
lm_slope_K = map_dbl(lm_K, "slope"),
|
||||||
lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE),
|
R_Squared_K = map_dbl(lm_K, "r_squared"),
|
||||||
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
|
lm_Score_K = map_dbl(lm_K, "score"),
|
||||||
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
|
|
||||||
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
|
lm_intercept_r = map_dbl(lm_r, "intercept"),
|
||||||
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
|
lm_slope_r = map_dbl(lm_r, "slope"),
|
||||||
|
R_Squared_r = map_dbl(lm_r, "r_squared"),
|
||||||
|
lm_Score_r = map_dbl(lm_r, "score"),
|
||||||
|
|
||||||
|
lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
|
||||||
|
lm_slope_AUC = map_dbl(lm_AUC, "slope"),
|
||||||
|
R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
|
||||||
|
lm_Score_AUC = map_dbl(lm_AUC, "score")
|
||||||
|
) %>%
|
||||||
|
select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we now have coefficients
|
||||||
|
|
||||||
|
lm_mean_L <- mean(interactions$lm_Score_L, na.rm = TRUE)
|
||||||
|
lm_sd_L <- sd(interactions$lm_Score_L, na.rm = TRUE)
|
||||||
|
lm_mean_K <- mean(interactions$lm_Score_K, na.rm = TRUE)
|
||||||
|
lm_sd_K <- sd(interactions$lm_Score_K, na.rm = TRUE)
|
||||||
|
lm_mean_r <- mean(interactions$lm_Score_r, na.rm = TRUE)
|
||||||
|
lm_sd_r <- sd(interactions$lm_Score_r, na.rm = TRUE)
|
||||||
|
lm_mean_AUC <- mean(interactions$lm_Score_AUC, na.rm = TRUE)
|
||||||
|
lm_sd_AUC <- sd(interactions$lm_Score_AUC, na.rm = TRUE)
|
||||||
|
|
||||||
|
interactions <- interactions %>%
|
||||||
|
mutate(
|
||||||
|
Z_lm_L = if (!is.na(lm_sd_L) && lm_sd_L != 0) (lm_Score_L - lm_mean_L) / lm_sd_L else NA_real_,
|
||||||
|
Z_lm_K = if (!is.na(lm_sd_K) && lm_sd_K != 0) (lm_Score_K - lm_mean_K) / lm_sd_K else NA_real_,
|
||||||
|
Z_lm_r = if (!is.na(lm_sd_r) && lm_sd_r != 0) (lm_Score_r - lm_mean_r) / lm_sd_r else NA_real_,
|
||||||
|
Z_lm_AUC = if (!is.na(lm_sd_AUC) && lm_sd_AUC != 0) (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC else NA_real_
|
||||||
) %>%
|
) %>%
|
||||||
arrange(desc(Z_lm_L), desc(NG))
|
arrange(desc(Z_lm_L), desc(NG))
|
||||||
|
|
||||||
|
|
||||||
# Deletion data ranking and linear modeling
|
# Deletion data ranking and linear modeling
|
||||||
# Initialize this variable so we can return it as NULL for reference
|
# Initialize this variable so we can return it as NULL for reference
|
||||||
@@ -408,11 +410,12 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
Rank_lm_L = rank(Z_lm_adjusted_L),
|
Rank_lm_L = rank(Z_lm_adjusted_L),
|
||||||
Rank_lm_K = rank(Z_lm_adjusted_K),
|
Rank_lm_K = rank(Z_lm_adjusted_K),
|
||||||
Rank_lm_r = rank(Z_lm_adjusted_r),
|
Rank_lm_r = rank(Z_lm_adjusted_r),
|
||||||
Rank_lm_AUC = rank(Z_lm_adjusted_AUC),
|
Rank_lm_AUC = rank(Z_lm_adjusted_AUC)
|
||||||
Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
|
# Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
|
||||||
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
# Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
||||||
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
# Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
||||||
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
# Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# We are filtering invalid Z_lm scores so this must be in its own df
|
# We are filtering invalid Z_lm scores so this must be in its own df
|
||||||
@@ -443,10 +446,10 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
||||||
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
||||||
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
||||||
# Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared,
|
Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared,
|
||||||
# Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared,
|
Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared,
|
||||||
# Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared,
|
Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared,
|
||||||
# Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared,
|
Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared,
|
||||||
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
|
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
|
||||||
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
|
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
|
||||||
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
|
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
|
||||||
@@ -497,7 +500,11 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
|
Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
|
||||||
Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
|
Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
|
||||||
Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
|
Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
|
||||||
)
|
) %>%
|
||||||
|
select(-c(Rank_lm_lm_L, Rank_lm_lm_K, Rank_lm_lm_r, Rank_lm_lm_AUC,
|
||||||
|
Correlation_lm_L, Correlation_lm_K, Correlation_lm_r, Correlation_lm_AUC,
|
||||||
|
Correlation_lm_K_L, Correlation_lm_r_L, Correlation_lm_AUC_L,
|
||||||
|
Correlation_lm_r_K, Correlation_lm_AUC_K, Correlation_lm_AUC_r))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create the final calculations and interactions dataframes with specific columns for csv output
|
# Create the final calculations and interactions dataframes with specific columns for csv output
|
||||||
@@ -625,6 +632,23 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add annotations if specified
|
||||||
|
if (!is.null(config$annotations)) {
|
||||||
|
for (annotation in config$annotations) {
|
||||||
|
plot <- plot +
|
||||||
|
annotate(
|
||||||
|
"text",
|
||||||
|
x = ifelse(is.null(annotation$x), 0, annotation$x),
|
||||||
|
y = ifelse(is.null(annotation$y), Inf, annotation$y),
|
||||||
|
label = annotation$label,
|
||||||
|
hjust = ifelse(is.null(annotation$hjust), 0.5, annotation$hjust),
|
||||||
|
vjust = ifelse(is.null(annotation$vjust), 1, annotation$vjust),
|
||||||
|
size = ifelse(is.null(annotation$size), 3, annotation$size),
|
||||||
|
color = ifelse(is.null(annotation$color), "black", annotation$color)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!is.null(config$grid_lines)) {
|
if (!is.null(config$grid_lines)) {
|
||||||
plot <- plot + theme(
|
plot <- plot + theme(
|
||||||
panel.grid.major = if (config$grid_lines$major) element_line() else element_blank(),
|
panel.grid.major = if (config$grid_lines$major) element_line() else element_blank(),
|
||||||
@@ -783,6 +807,14 @@ generate_scatter_plot <- function(plot, config) {
|
|||||||
# Extract necessary values
|
# Extract necessary values
|
||||||
intercept <- config$lm_line$intercept # required
|
intercept <- config$lm_line$intercept # required
|
||||||
slope <- config$lm_line$slope # required
|
slope <- config$lm_line$slope # required
|
||||||
|
|
||||||
|
message("----- Generating lm_line -----")
|
||||||
|
message("Intercept: ", intercept)
|
||||||
|
message("Slope: ", slope)
|
||||||
|
message("Class: ", class(config$df[[config$x_var]]))
|
||||||
|
message("------------------------------")
|
||||||
|
|
||||||
|
|
||||||
xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]])))
|
xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]])))
|
||||||
xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]])))
|
xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]])))
|
||||||
color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue")
|
color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue")
|
||||||
@@ -1078,7 +1110,7 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
|
|||||||
lm_slope_col <- paste0("lm_slope_", var)
|
lm_slope_col <- paste0("lm_slope_", var)
|
||||||
lm_intercept <- first(group_data_filtered[[lm_intercept_col]])
|
lm_intercept <- first(group_data_filtered[[lm_intercept_col]])
|
||||||
lm_slope <- first(group_data_filtered[[lm_slope_col]])
|
lm_slope <- first(group_data_filtered[[lm_slope_col]])
|
||||||
|
|
||||||
plot_config <- list(
|
plot_config <- list(
|
||||||
df = group_data_filtered,
|
df = group_data_filtered,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
@@ -1183,7 +1215,7 @@ generate_rank_plot_configs <- function(df, is_lm = FALSE, filter_na = FALSE, ove
|
|||||||
# Generate plots for each variable
|
# Generate plots for each variable
|
||||||
for (variable in variables) {
|
for (variable in variables) {
|
||||||
rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
|
rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
|
||||||
zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable)
|
zscore_var <- if (is_lm) paste0("Z_lm_adjusted_", variable) else paste0("Avg_Zscore_adjusted_", variable)
|
||||||
y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)
|
y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)
|
||||||
|
|
||||||
# Loop through SD bands
|
# Loop through SD bands
|
||||||
@@ -1614,9 +1646,9 @@ main <- function() {
|
|||||||
write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||||
write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||||
|
|
||||||
message("Generating reference interaction plots")
|
# message("Generating reference interaction plots")
|
||||||
reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference")
|
# reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference")
|
||||||
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
||||||
|
|
||||||
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
||||||
deletion_adjusted <- stats_na %>% # formerly X2
|
deletion_adjusted <- stats_na %>% # formerly X2
|
||||||
@@ -1647,9 +1679,9 @@ main <- function() {
|
|||||||
write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
||||||
write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
||||||
|
|
||||||
message("Generating deletion interaction plots")
|
# message("Generating deletion interaction plots")
|
||||||
deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion")
|
# deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion")
|
||||||
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
|
# generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
|
||||||
|
|
||||||
message("Writing enhancer/suppressor csv files")
|
message("Writing enhancer/suppressor csv files")
|
||||||
interaction_threshold <- 2 # TODO add to study config?
|
interaction_threshold <- 2 # TODO add to study config?
|
||||||
|
|||||||
Reference in New Issue
Block a user