Correctly group linear modeling

Tento commit je obsažen v:
2024-09-17 23:25:58 -04:00
rodič 7d2473044e
revize fdcd97d7f1

Zobrazit soubor

@@ -736,7 +736,7 @@ generate_interaction_plot_configs <- function(df, variables) {
}
generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
sd_bands <- c(1, 2, 3)
configs <- list()
@@ -824,23 +824,32 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
title_suffix <- paste("Rank Avg Zscore vs lm", variable)
rectangles <- NULL
}
# Check if there is sufficient variation
if (length(unique(df_filtered[[x_var]])) < 2 || length(unique(df_filtered[[y_var]])) < 2) {
print(df_filtered %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", x_var, y_var))))
message("Not enough variation in ", x_var, " or ", y_var, " to fit linear model. Skipping.")
next
}
# Fit linear model
lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
# Fit linear model grouped by OrfRep, Gene, num
lm_results <- df_filtered %>%
group_by(OrfRep, Gene, num) %>%
do({
model <- try(lm(as.formula(paste(y_var, "~", x_var)), data = .), silent = TRUE)
if (inherits(model, "try-error")) {
# Return NA if model fails
data.frame(intercept = NA, slope = NA, r_squared = NA)
} else {
summary_model <- summary(model)
data.frame(
intercept = coef(model)[1],
slope = coef(model)[2],
r_squared = summary_model$r.squared
)
}
}) %>%
ungroup()
# # Check for perfect fit
# if (summary(lm_fit)$sigma == 0) {
# next # Skip this iteration if the fit is perfect
# }
r_squared <- summary(lm_fit)$r.squared
aggregated_lm <- lm_results %>%
summarize(
intercept = mean(intercept, na.rm = TRUE),
slope = mean(slope, na.rm = TRUE),
r_squared = mean(r_squared, na.rm = TRUE)
)
configs[[length(configs) + 1]] <- list(
df = df_filtered,
@@ -852,14 +861,14 @@ generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
list(
x = 0,
y = 0,
label = paste("R-squared =", round(r_squared, 2))
label = paste("R-squared =", round(aggregated_lm$r_squared, 2))
)
),
sd_band_values = NULL, # Not applicable
sd_band_values = NULL, # Not applicable
shape = 3,
size = 0.1,
add_smooth = TRUE,
lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
lm_line = list(intercept = aggregated_lm$intercept, slope = aggregated_lm$slope),
legend_position = "right",
color_var = "Overlap",
x_label = x_var,
@@ -1273,14 +1282,14 @@ main <- function() {
ungroup()
message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(reference_strain, max_conc, interaction_vars, group_vars = c("OrfRep", "Gene", "num"))
reference_results <- calculate_interaction_scores(reference_strain, max_conc, group_vars = c("OrfRep", "Gene", "num"))
zscores_calculations_reference <- reference_results$calculations
zscores_interactions_reference <- reference_results$interactions
zscores_calculations_reference_joined <- reference_results$calculations_joined
zscores_interactions_reference_joined <- reference_results$interactions_joined
message("Calculating deletion strain(s) interactions scores")
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, interaction_vars, group_vars = c("OrfRep", "Gene", "num"))
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, group_vars = c("OrfRep", "Gene", "num"))
zscores_calculations <- deletion_results$calculations
zscores_interactions <- deletion_results$interactions
zscores_calculations_joined <- deletion_results$calculations_joined