Correctly group linear modeling

This commit is contained in:
2024-09-17 23:25:58 -04:00
parent 7d2473044e
commit fdcd97d7f1

View File

@@ -736,7 +736,7 @@ generate_interaction_plot_configs <- function(df, variables) {
} }
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) { generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
sd_bands <- c(1, 2, 3) sd_bands <- c(1, 2, 3)
configs <- list() configs <- list()
@@ -824,23 +824,32 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
title_suffix <- paste("Rank Avg Zscore vs lm", variable) title_suffix <- paste("Rank Avg Zscore vs lm", variable)
rectangles <- NULL rectangles <- NULL
} }
# Check if there is sufficient variation
if (length(unique(df_filtered[[x_var]])) < 2 || length(unique(df_filtered[[y_var]])) < 2) {
print(df_filtered %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", x_var, y_var))))
message("Not enough variation in ", x_var, " or ", y_var, " to fit linear model. Skipping.")
next
}
# Fit linear model # Fit linear model grouped by OrfRep, Gene, num
lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered) lm_results <- df_filtered %>%
group_by(OrfRep, Gene, num) %>%
do({
model <- try(lm(as.formula(paste(y_var, "~", x_var)), data = .), silent = TRUE)
if (inherits(model, "try-error")) {
# Return NA if model fails
data.frame(intercept = NA, slope = NA, r_squared = NA)
} else {
summary_model <- summary(model)
data.frame(
intercept = coef(model)[1],
slope = coef(model)[2],
r_squared = summary_model$r.squared
)
}
}) %>%
ungroup()
# # Check for perfect fit aggregated_lm <- lm_results %>%
# if (summary(lm_fit)$sigma == 0) { summarize(
# next # Skip this iteration if the fit is perfect intercept = mean(intercept, na.rm = TRUE),
# } slope = mean(slope, na.rm = TRUE),
r_squared = mean(r_squared, na.rm = TRUE)
r_squared <- summary(lm_fit)$r.squared )
configs[[length(configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = df_filtered, df = df_filtered,
@@ -852,14 +861,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
list( list(
x = 0, x = 0,
y = 0, y = 0,
label = paste("R-squared =", round(r_squared, 2)) label = paste("R-squared =", round(aggregated_lm$r_squared, 2))
) )
), ),
sd_band_values = NULL, # Not applicable sd_band_values = NULL, # Not applicable
shape = 3, shape = 3,
size = 0.1, size = 0.1,
add_smooth = TRUE, add_smooth = TRUE,
lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]), lm_line = list(intercept = aggregated_lm$intercept, slope = aggregated_lm$slope),
legend_position = "right", legend_position = "right",
color_var = "Overlap", color_var = "Overlap",
x_label = x_var, x_label = x_var,
@@ -1273,14 +1282,14 @@ main <- function() {
ungroup() ungroup()
message("Calculating reference strain interaction scores") message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(reference_strain, max_conc, interaction_vars, group_vars = c("OrfRep", "Gene", "num")) reference_results <- calculate_interaction_scores(reference_strain, max_conc, group_vars = c("OrfRep", "Gene", "num"))
zscores_calculations_reference <- reference_results$calculations zscores_calculations_reference <- reference_results$calculations
zscores_interactions_reference <- reference_results$interactions zscores_interactions_reference <- reference_results$interactions
zscores_calculations_reference_joined <- reference_results$calculations_joined zscores_calculations_reference_joined <- reference_results$calculations_joined
zscores_interactions_reference_joined <- reference_results$interactions_joined zscores_interactions_reference_joined <- reference_results$interactions_joined
message("Calculating deletion strain(s) interactions scores") message("Calculating deletion strain(s) interactions scores")
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, interaction_vars, group_vars = c("OrfRep", "Gene", "num")) deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, group_vars = c("OrfRep", "Gene", "num"))
zscores_calculations <- deletion_results$calculations zscores_calculations <- deletion_results$calculations
zscores_interactions <- deletion_results$interactions zscores_interactions <- deletion_results$interactions
zscores_calculations_joined <- deletion_results$calculations_joined zscores_calculations_joined <- deletion_results$calculations_joined