Refactor calculations to try and grouping issues

This commit is contained in:
2024-10-08 02:16:35 -04:00
parent aef0fba1da
commit d456bcdadd

View File

@@ -177,7 +177,7 @@ update_gene_names <- function(df, sgd_gene_list) {
}
calculate_summary_stats <- function(df, variables, group_vars) {
summary_stats <- df %>%
ss <- df %>%
group_by(across(all_of(group_vars))) %>%
summarise(
N = n(),
@@ -192,16 +192,19 @@ calculate_summary_stats <- function(df, variables, group_vars) {
),
.names = "{.fn}_{.col}"
),
sum_NG = sum(NG, na.rm = TRUE),
sum_DB = sum(DB, na.rm = TRUE),
sum_SM = sum(SM, na.rm = TRUE),
.groups = "drop"
)
# Create a cleaned version of df that doesn't overlap with summary_stats
df_cleaned <- df %>%
select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
select(-any_of(setdiff(intersect(names(df), names(ss)), group_vars)))
df_joined <- left_join(df_cleaned, summary_stats, by = group_vars)
ss_joined <- left_join(df_cleaned, ss, by = group_vars)
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
return(list(stats_csv = ss, stats_joined = ss_joined))
}
calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) {
@@ -217,11 +220,12 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
group_vars <- c("OrfRep", "Gene", "Drug")
}
perform_lm <- function(x, y, max_conc) {
if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) {
perform_lm <- function(response, predictor, max_conc) {
if (all(is.na(response)) || all(is.na(predictor)) ||
length(response[!is.na(response)]) == 0 || length(predictor[!is.na(predictor)]) == 0) {
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
} else {
fit <- lm(y ~ x)
fit <- lm(response ~ predictor)
return(list(
intercept = coef(fit)[1],
slope = coef(fit)[2],
@@ -247,98 +251,69 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
)
# Join WT statistics to df
df <- df %>%
left_join(wt_stats, by = c(bg_group_vars))
df <- df %>% left_join(wt_stats, by = bg_group_vars)
# Compute mean values at zero concentration
mean_zeroes <- df %>%
filter(conc_num == 0) %>%
df_mean_zeros <- df %>%
group_by(across(all_of(group_vars))) %>%
summarise(
filter(conc_num == 0) %>%
summarize(
mean_L_zero = mean(mean_L, na.rm = TRUE),
mean_K_zero = mean(mean_K, na.rm = TRUE),
mean_r_zero = mean(mean_r, na.rm = TRUE),
mean_AUC_zero = mean(mean_AUC, na.rm = TRUE),
.groups = "drop"
.groups = "drop" # Ungroup after summarizing
)
df <- df %>%
left_join(mean_zeroes, by = c(group_vars))
# Calculate Raw Shifts and Z Shifts for all rows
df <- df %>%
mutate(
Raw_Shift_L = mean_L_zero - WT_L,
Raw_Shift_K = mean_K_zero - WT_K,
Raw_Shift_r = mean_r_zero - WT_r,
Raw_Shift_AUC = mean_AUC_zero - WT_AUC,
Z_Shift_L = Raw_Shift_L / WT_sd_L,
Z_Shift_K = Raw_Shift_K / WT_sd_K,
Z_Shift_r = Raw_Shift_r / WT_sd_r,
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC
)
df <- df %>% left_join(df_mean_zeros, by = group_vars)
# Calculate per-row data
calculations <- df %>%
group_by(across(all_of(c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")))) %>%
mutate(
NG_sum = sum(NG, na.rm = TRUE),
DB_sum = sum(DB, na.rm = TRUE),
SM_sum = sum(SM, na.rm = TRUE),
# Expected values
Exp_L = WT_L + Raw_Shift_L,
Exp_K = WT_K + Raw_Shift_K,
Exp_r = WT_r + Raw_Shift_r,
Exp_AUC = WT_AUC + Raw_Shift_AUC,
# Deltas
Delta_L = mean_L - Exp_L,
Delta_K = mean_K - Exp_K,
Delta_r = mean_r - Exp_r,
Delta_AUC = mean_AUC - Exp_AUC,
# Adjust deltas for NG and SM
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
# Calculate Z-scores
Zscore_L = Delta_L / WT_sd_L,
Zscore_K = Delta_K / WT_sd_K,
Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC
) %>%
ungroup()
# For interaction plot error bars
delta_means_sds <- calculations %>%
group_by(across(all_of(group_vars))) %>%
summarise(
mean_Delta_L = mean(Delta_L, na.rm = TRUE),
mean_Delta_K = mean(Delta_K, na.rm = TRUE),
mean_Delta_r = mean(Delta_r, na.rm = TRUE),
mean_Delta_AUC = mean(Delta_AUC, na.rm = TRUE),
sd_Delta_L = sd(Delta_L, na.rm = TRUE),
sd_Delta_K = sd(Delta_K, na.rm = TRUE),
sd_Delta_r = sd(Delta_r, na.rm = TRUE),
sd_Delta_AUC = sd(Delta_AUC, na.rm = TRUE),
.groups = "drop"
)
calculations <- calculations %>%
left_join(delta_means_sds, by = group_vars)
mutate(
Raw_Shift_L = mean_L_zero - WT_L,
Raw_Shift_K = mean_K_zero - WT_K,
Raw_Shift_r = mean_r_zero - WT_r,
Raw_Shift_AUC = mean_AUC_zero - WT_AUC,
Z_Shift_L = Raw_Shift_L / WT_sd_L,
Z_Shift_K = Raw_Shift_K / WT_sd_K,
Z_Shift_r = Raw_Shift_r / WT_sd_r,
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC,
# Expected values
Exp_L = WT_L + Raw_Shift_L,
Exp_K = WT_K + Raw_Shift_K,
Exp_r = WT_r + Raw_Shift_r,
Exp_AUC = WT_AUC + Raw_Shift_AUC,
# Deltas
Delta_L = mean_L - Exp_L,
Delta_K = mean_K - Exp_K,
Delta_r = mean_r - Exp_r,
Delta_AUC = mean_AUC - Exp_AUC,
# Adjust deltas for NG and SM
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
# Calculate Z-scores
Zscore_L = Delta_L / WT_sd_L,
Zscore_K = Delta_K / WT_sd_K,
Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC
)
# Calculate group-specific interactions
interactions <- calculations %>%
group_by(across(all_of(group_vars))) %>%
summarise(
NG_sum_int = sum(NG),
DB_sum_int = sum(DB),
SM_sum_int = sum(SM),
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
num_non_removed_concs = total_conc_num - sum(DB) - 1,
# Count QA columns, this will clobber the originals in left join but
# we're not using them after this point so it's cleaner this way
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE),
# Add background data
Raw_Shift_L = first(Raw_Shift_L),
Raw_Shift_K = first(Raw_Shift_K),
@@ -356,7 +331,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
# We sum twice but it saves on creating another block
# TODO should we use mean() here, not sure
# TODO should we use mean() here, original logic needs explanation
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
@@ -392,7 +367,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
# Summary statistics for lm scores
interactions <- interactions %>%
# group_by(across(all_of(group_vars))) %>%
group_by(across(all_of(group_vars))) %>%
mutate(
lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
@@ -407,46 +382,42 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
) %>%
arrange(desc(Z_lm_L), desc(NG_sum_int))
arrange(desc(Z_lm_L), desc(NG))
# Deletion data ranking and linear modeling
# Initialize this variable so we can return it as NULL for reference
# We don't do the following calculations for the reference strain
interactions_narm <- NULL # formerly X_NArm
if (type == "deletion") {
interactions <- interactions %>%
mutate(
Avg_Zscore_L_adjusted = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
Avg_Zscore_K_adjusted = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
Avg_Zscore_r_adjusted = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
Avg_Zscore_AUC_adjusted = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
Rank_L = rank(Avg_Zscore_L_adjusted),
Rank_K = rank(Avg_Zscore_K_adjusted),
Rank_r = rank(Avg_Zscore_r_adjusted),
Rank_AUC = rank(Avg_Zscore_AUC_adjusted),
Rank_lm_L = rank(Z_lm_L_adjusted),
Rank_lm_K = rank(Z_lm_K_adjusted),
Rank_lm_r = rank(Z_lm_r_adjusted),
Rank_lm_AUC = rank(Z_lm_AUC_adjusted),
Rank_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
Rank_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
Rank_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
Rank_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc))
# Set NA values to a small value to include them in ranking
Avg_Zscore_adjusted_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
Avg_Zscore_adjusted_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
Avg_Zscore_adjusted_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
Avg_Zscore_adjusted_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
Z_lm_adjusted_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
Z_lm_adjusted_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
Z_lm_adjusted_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
Z_lm_adjusted_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
Rank_L = rank(Avg_Zscore_adjusted_L),
Rank_K = rank(Avg_Zscore_adjusted_K),
Rank_r = rank(Avg_Zscore_adjusted_r),
Rank_AUC = rank(Avg_Zscore_adjusted_AUC),
Rank_lm_L = rank(Z_lm_adjusted_L),
Rank_lm_K = rank(Z_lm_adjusted_K),
Rank_lm_r = rank(Z_lm_adjusted_r),
Rank_lm_AUC = rank(Z_lm_adjusted_AUC),
Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
)
# Add overlap threshold categories based on Z-lm and Avg-Z scores
interactions <- interactions %>%
# We are filtering invalid Z_lm scores so this must be in its own df
# A left join will just reintroduce NAs by coercion
interactions_narm <- interactions %>%
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
mutate(
Overlap = case_when(
@@ -460,11 +431,32 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
TRUE ~ "No Effect"
),
Rank_lm_R_squared_L = Rank_lm_L[[1]]$r_squared,
Rank_lm_R_squared_K = Rank_lm_L[[1]]$r_squared,
Rank_lm_R_squared_r = Rank_lm_r[[1]]$r_squared,
Rank_lm_R_squared_AUC = Rank_lm_AUC[[1]]$r_squared,
Rank_L = rank(Avg_Zscore_L),
Rank_K = rank(Avg_Zscore_K),
Rank_r = rank(Avg_Zscore_r),
Rank_AUC = rank(Avg_Zscore_AUC),
Rank_lm_L = rank(Z_lm_L),
Rank_lm_K = rank(Z_lm_K),
Rank_lm_r = rank(Z_lm_r),
Rank_lm_AUC = rank(Z_lm_AUC),
Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
# Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared,
# Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared,
# Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared,
# Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared,
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)),
Correlation_lm_intercept_L = Correlation_lm_L[[1]]$intercept,
Correlation_lm_slope_L = Correlation_lm_L[[1]]$slope,
Correlation_lm_R_Squared_L = Correlation_lm_L[[1]]$r_squared,
@@ -483,97 +475,77 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Correlation_lm_Score_AUC = Correlation_lm_AUC[[1]]$score,
Correlation_lm_intercept_K_L = Correlation_lm_K_L[[1]]$intercept,
Correlation_lm_slope_K_L = Correlation_lm_K_L[[1]]$slope,
Correlation_lm_R_squared_K_L = Correlation_lm_K_L[[1]]$r_squared,
Correlation_lm_R_Squared_K_L = Correlation_lm_K_L[[1]]$r_squared,
Correlation_lm_Score_K_L = Correlation_lm_K_L[[1]]$score,
Correlation_lm_intercept_r_L = Correlation_lm_r_L[[1]]$intercept,
Correlation_lm_slope_r_L = Correlation_lm_r_L[[1]]$slope,
Correlation_lm_R_squared_r_L = Correlation_lm_r_L[[1]]$r_squared,
Correlation_lm_R_Squared_r_L = Correlation_lm_r_L[[1]]$r_squared,
Correlation_lm_Score_r_L = Correlation_lm_r_L[[1]]$score,
Correlation_lm_intercept_AUC_L = Correlation_lm_AUC_L[[1]]$intercept,
Correlation_lm_slope_AUC_L = Correlation_lm_AUC_L[[1]]$slope,
Correlation_lm_R_squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared,
Correlation_lm_R_Squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared,
Correlation_lm_Score_AUC_L = Correlation_lm_AUC_L[[1]]$score,
Correlation_lm_intercept_r_K = Correlation_lm_r_K[[1]]$intercept,
Correlation_lm_slope_r_K = Correlation_lm_r_K[[1]]$slope,
Correlation_lm_R_squared_r_K = Correlation_lm_r_K[[1]]$r_squared,
Correlation_lm_R_Squared_r_K = Correlation_lm_r_K[[1]]$r_squared,
Correlation_lm_Score_r_K = Correlation_lm_r_K[[1]]$score,
Correlation_lm_intercept_AUC_K = Correlation_lm_AUC_K[[1]]$intercept,
Correlation_lm_slope_AUC_K = Correlation_lm_AUC_K[[1]]$slope,
Correlation_lm_R_squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared,
Correlation_lm_R_Squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared,
Correlation_lm_Score_AUC_K = Correlation_lm_AUC_K[[1]]$score,
Correlation_lm_intercept_AUC_r = Correlation_lm_AUC_r[[1]]$intercept,
Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
Correlation_lm_R_squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
)
}
# Create the final calculations and interactions dataframes with specific columns for csv output
# Trying to mimic original output data
df_calculations <- calculations %>%
select(all_of(c(
group_vars, # necessary for full_data left_join
"conc_num", "conc_num_factor", "conc_num_factor_factor", "N",
"mean_L", "median_L", "sd_L", "se_L",
"mean_K", "median_K", "sd_K", "se_K",
"mean_r", "median_r", "sd_r", "se_r",
"mean_AUC", "median_AUC", "sd_AUC", "se_AUC",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"WT_L", "WT_K", "WT_r", "WT_AUC",
"WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
"Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
"Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
"mean_Delta_L", "mean_Delta_K", "mean_Delta_r", "mean_Delta_AUC",
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
"NG_sum", "DB_sum", "SM_sum"
))) %>%
rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)
calculations_csv <- calculations %>%
select(
all_of(group_vars),
conc_num, conc_num_factor, conc_num_factor_factor,
N, NG, DB, SM,
mean_L, median_L, sd_L, se_L,
mean_K, median_K, sd_K, se_K,
mean_r, median_r, sd_r, se_r,
mean_AUC, median_AUC, sd_AUC, se_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
WT_L, WT_K, WT_r, WT_AUC,
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
Exp_L, Exp_K, Exp_r, Exp_AUC,
Delta_L, Delta_K, Delta_r, Delta_AUC,
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
)
df_interactions <- interactions %>%
select(any_of(c(
group_vars, # necessary for full_data left_join
"Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
"NG_sum_int", "DB_sum_int", "SM_sum_int",
"Rank_lm_R_squared_L", "Rank_lm_R_squared_K", "Rank_lm_R_squared_r", "Rank_lm_R_squared_AUC",
"Correlation_lm_intercept_L", "Correlation_lm_slope_L", "Correlation_lm_R_squared_L", "Correlation_lm_Score_L",
"Correlation_lm_intercept_K", "Correlation_lm_slope_K", "Correlation_lm_R_squared_K", "Correlation_lm_Score_K",
"Correlation_lm_intercept_r", "Correlation_lm_slope_r", "Correlation_lm_R_squared_r", "Correlation_lm_Score_r",
"Correlation_lm_intercept_AUC", "Correlation_lm_slope_AUC", "Correlation_lm_R_squared_AUC", "Correlation_lm_Score_AUC",
"Correlation_lm_intercept_K_L", "Correlation_lm_slope_K_L", "Correlation_lm_R_squared_K_L", "Correlation_lm_Score_K_L",
"Correlation_lm_intercept_r_L", "Correlation_lm_slope_r_L", "Correlation_lm_R_squared_r_L", "Correlation_lm_Score_r_L",
"Correlation_lm_intercept_AUC_L", "Correlation_lm_slope_AUC_L", "Correlation_lm_R_squared_AUC_L", "Correlation_lm_Score_AUC_L",
"Correlation_lm_intercept_r_K", "Correlation_lm_slope_r_K", "Correlation_lm_R_squared_r_K", "Correlation_lm_Score_r_K",
"Correlation_lm_intercept_AUC_K", "Correlation_lm_slope_AUC_K", "Correlation_lm_R_squared_AUC_K", "Correlation_lm_Score_AUC_K",
"Correlation_lm_intercept_AUC_r", "Correlation_lm_slope_AUC_r", "Correlation_lm_R_squared_AUC_r", "Correlation_lm_Score_AUC_r",
"Overlap"
))) %>%
rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
interactions_csv <- interactions %>%
select(
all_of(group_vars),
NG, DB, SM,
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
Sum_Z_Score_L, Sum_Z_Score_K, Sum_Z_Score_r, Sum_Z_Score_AUC,
lm_Score_L, lm_Score_K, lm_Score_r, lm_Score_AUC,
R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC
)
# Avoid column collision on left join for overlapping variables
calculations_no_overlap <- calculations %>%
select(-any_of(c("DB", "NG", "SM",
# Don't need these anywhere so easier to remove
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
# we need these for the interactions but the original code has the same names in both datasets
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC"
)))
full_data <- calculations_no_overlap %>%
left_join(interactions, by = group_vars)
# Filter the calculations dataframe so we don't clobber on left join
calculations_selected <- calculations %>%
select(all_of(c(group_vars, setdiff(names(calculations), names(interactions)))))
interactions_joined <- left_join(calculations_selected, interactions, by = group_vars)
# Return final dataframes
return(list(
calculations = df_calculations,
interactions = df_interactions,
full_data = full_data
calculations_csv = calculations_csv,
interactions_csv = interactions_csv,
interactions = interactions,
interactions_joined = interactions_joined,
interactions_narm = interactions_narm
))
}
@@ -1068,10 +1040,10 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
}
for (var in names(delta_limits_map)) {
y_var_name <- paste0("Delta_", var)
y_limits <- delta_limits_map[[var]]
y_span <- y_limits[2] - y_limits[1]
y_var_name <- paste0("Delta_", var)
# Anti-filter to select out-of-bounds rows
out_of_bounds <- group_data %>%
filter(is.na(!!sym(y_var_name)) |
@@ -1095,19 +1067,17 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
next # skip plot if insufficient data is available
}
WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0)
Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2)
Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2)
R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2)
NG_value <- first(group_data_filtered$NG, default = 0)
DB_value <- first(group_data_filtered$DB, default = 0)
SM_value <- first(group_data_filtered$SM, default = 0)
NG <- first(group_data_filtered$NG)
DB <- first(group_data_filtered$DB)
SM <- first(group_data_filtered$SM)
lm_intercept_col <- paste0("lm_intercept_", var)
lm_slope_col <- paste0("lm_slope_", var)
lm_intercept <- first(group_data_filtered[[lm_intercept_col]], default = 0)
lm_slope <- first(group_data_filtered[[lm_slope_col]], default = 0)
lm_intercept <- first(group_data_filtered[[lm_intercept_col]])
lm_slope <- first(group_data_filtered[[lm_slope_col]])
plot_config <- list(
df = group_data_filtered,
@@ -1122,10 +1092,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
annotations = list(
list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste(" ZShift =", round(Z_Shift_value, 2))),
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste(" lm ZScore =", round(Z_lm_value, 2))),
# list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste(" R-squared =", round(R_squared_value, 2))),
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG_value)),
list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB_value)),
list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG)),
list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB)),
list(x = 1, y = y_limits[1], label = paste("SM =", SM))
),
error_bar = TRUE,
error_bar_params = list(
@@ -1267,7 +1236,7 @@ generate_correlation_plot_configs <- function(df, df_reference) {
intercept <- df[[paste0("Correlation_lm_intercept_", rel$y, "_", rel$x)]][1]
slope <- df[[paste0("Correlation_lm_slope_", rel$y, "_", rel$x)]][1]
r_squared <- df[[paste0("Correlation_lm_R_squared_", rel$y, "_", rel$x)]][1]
r_squared <- df[[paste0("Correlation_lm_R_Squared_", rel$y, "_", rel$x)]][1]
r_squared_rounded <- round(r_squared, 4)
r_squared_label <- paste("R-squared =", r_squared_rounded)
xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]])))
@@ -1355,7 +1324,7 @@ main <- function() {
df_stats <- calculate_summary_stats( # formerly X_stats_ALL
df = df,
variables = c("L", "K", "r", "AUC", "delta_bg"),
group_vars = c("conc_num", "conc_num_factor_factor"))$df_with_stats
group_vars = c("conc_num", "conc_num_factor_factor"))$stats_joined
frequency_delta_bg_plot_configs <- list(
plots = list(
@@ -1422,11 +1391,10 @@ main <- function() {
df = df_na,
variables = c("L", "K", "r", "AUC", "delta_bg"),
group_vars = c("conc_num", "conc_num_factor_factor"))
df_na_ss <- ss$summary_stats
df_na_stats <- ss$df_with_stats # formerly X_stats_ALL
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
stats_na <- ss$stats_joined # formerly X_stats_ALL
write.csv(ss$stats_csv, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
# This can help bypass missing values ggplot warnings during testing
df_na_stats_filtered <- df_na_stats %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite))
stats_na_filtered <- stats_na %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite))
message("Calculating summary statistics excluding zero values")
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
@@ -1434,40 +1402,40 @@ main <- function() {
df = df_no_zeros,
variables = c("L", "K", "r", "AUC", "delta_bg"),
group_vars = c("conc_num", "conc_num_factor_factor")
)$df_with_stats
)$stats_joined
message("Filtering by 2SD of K")
df_na_within_2sd_k <- df_na_stats %>%
df_na_within_2sd_k <- stats_na %>%
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
df_na_outside_2sd_k <- df_na_stats %>%
df_na_outside_2sd_k <- stats_na %>%
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
message("Calculating summary statistics for L within 2SD of K")
# TODO We're omitting the original z_max calculation, not sure if needed?
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", # formerly X_stats_BY_L_within_2SD_K
group_vars = c("conc_num", "conc_num_factor_factor"))$summary_stats
write.csv(ss,
group_vars = c("conc_num", "conc_num_factor_factor"))
write.csv(ss$stats_csv,
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"),
row.names = FALSE)
message("Calculating summary statistics for L outside 2SD of K")
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", # formerly X_stats_BY_L_outside_2SD_K
group_vars = c("conc_num", "conc_num_factor_factor"))
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
write.csv(ss$summary_stats,
df_na_l_outside_2sd_k_stats <- ss$stats_joined
write.csv(ss$stats_csv,
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"),
row.names = FALSE)
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
variables = c("L", "K", "r", "AUC", "delta_bg"),
df_before = df_stats,
df_after = df_na_stats_filtered
df_after = stats_na_filtered
)
plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
variables = c("L", "K", "r", "AUC", "delta_bg"),
df_before = df_stats,
df_after = df_na_stats_filtered,
df_after = stats_na_filtered,
plot_type = "box"
)
@@ -1587,17 +1555,17 @@ main <- function() {
filter(!is.na(L))
message("Calculating background summary statistics")
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
ss <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
summary_stats_bg <- ss_bg$summary_stats
df_bg_stats <- ss_bg$df_with_stats
stats_background_csv <- ss$stats_csv
stats_background <- ss$stats_joined
write.csv(
summary_stats_bg,
stats_background_csv,
file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
row.names = FALSE)
message("Setting missing reference values to the highest theoretical value at each drug conc for L")
df_reference <- df_na_stats %>% # formerly X2_RF
df_reference <- stats_na %>% # formerly X2_RF
filter(OrfRep == strain) %>%
filter(!is.na(L)) %>%
group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>%
@@ -1609,14 +1577,14 @@ main <- function() {
ungroup()
message("Calculating reference strain summary statistics")
df_reference_summary_stats <- calculate_summary_stats( # formerly X_stats_X2_RF
stats_reference <- calculate_summary_stats( # formerly X_stats_X2_RF
df = df_reference,
variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats
)$stats_joined
# Summarise statistics for error bars
df_reference_summary_stats <- df_reference_summary_stats %>%
stats_reference <- stats_reference %>%
group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>%
mutate(
mean_mean_L = first(mean_L),
@@ -1631,26 +1599,27 @@ main <- function() {
)
message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
df_reference_interaction_stats <- calculate_summary_stats(
stats_interactions_reference <- calculate_summary_stats(
df = df_reference,
variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats
)$stats_joined
message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
df_reference_calculations <- reference_results$calculations
df_reference_interactions_joined <- reference_results$full_data
df_reference_interactions <- reference_results$interactions
write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
reference_results <- calculate_interaction_scores(stats_interactions_reference, stats_background, "reference")
calculations_reference_csv <- reference_results$calculations_csv
interactions_reference_csv <- reference_results$interactions_csv
interactions_reference <- reference_results$interactions
interactions_reference_joined <- reference_results$interactions_joined
write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
# message("Generating reference interaction plots")
# reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2
deletion_adjusted <- stats_na %>% # formerly X2
filter(OrfRep != strain) %>%
filter(!is.na(L)) %>%
group_by(OrfRep, Gene, conc_num, conc_num_factor_factor) %>%
@@ -1662,96 +1631,94 @@ main <- function() {
ungroup()
message("Calculating deletion strain(s) interaction summary statistics")
df_deletion_stats <- calculate_summary_stats(
df = df_deletion,
deletion_stats <- calculate_summary_stats(
df = deletion_adjusted,
variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats
)$stats_joined
message("Calculating deletion strain(s) interactions scores")
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion")
df_calculations <- deletion_results$calculations
df_interactions <- deletion_results$interactions
df_interactions_joined <- deletion_results$full_data
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
deletion_results <- calculate_interaction_scores(deletion_stats, stats_background, "deletion")
calculations_csv <- deletion_results$calculations_csv
interactions_csv <- deletion_results$interactions_csv
interactions <- deletion_results$interactions
interactions_joined <- deletion_results$interactions_joined
interactions_narm <- deletion_results$interactions_narm
write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
# message("Generating deletion interaction plots")
# deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
# generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
message("Generating deletion interaction plots")
deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion")
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
# message("Writing enhancer/suppressor csv files")
# interaction_threshold <- 2 # TODO add to study config?
# enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
# suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
# enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
# suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
# enhancers_L <- df_interactions[enhancer_condition_L, ]
# suppressors_L <- df_interactions[suppressor_condition_L, ]
# enhancers_K <- df_interactions[enhancer_condition_K, ]
# suppressors_K <- df_interactions[suppressor_condition_K, ]
# enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
# enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
# write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
# write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
# write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
# write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
# write.csv(enhancers_and_suppressors_L,
# file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
# write.csv(enhancers_and_suppressors_K,
# file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
message("Writing enhancer/suppressor csv files")
interaction_threshold <- 2 # TODO add to study config?
enhancer_condition_L <- interactions$Avg_Zscore_L >= interaction_threshold
suppressor_condition_L <- interactions$Avg_Zscore_L <= -interaction_threshold
enhancer_condition_K <- interactions$Avg_Zscore_K >= interaction_threshold
suppressor_condition_K <- interactions$Avg_Zscore_K <= -interaction_threshold
enhancers_L <- interactions[enhancer_condition_L, ]
suppressors_L <- interactions[suppressor_condition_L, ]
enhancers_K <- interactions[enhancer_condition_K, ]
suppressors_K <- interactions[suppressor_condition_K, ]
enhancers_and_suppressors_L <- interactions[enhancer_condition_L | suppressor_condition_L, ]
enhancers_and_suppressors_K <- interactions[enhancer_condition_K | suppressor_condition_K, ]
write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_L,
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_K,
file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
# message("Writing linear model enhancer/suppressor csv files")
# lm_interaction_threshold <- 2 # TODO add to study config?
# enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
# suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
# enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
# suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
# write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
# write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
# write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
# write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
message("Writing linear model enhancer/suppressor csv files")
lm_interaction_threshold <- 2 # TODO add to study config?
enhancers_lm_L <- interactions[interactions$Z_lm_L >= lm_interaction_threshold, ]
suppressors_lm_L <- interactions[interactions$Z_lm_L <= -lm_interaction_threshold, ]
enhancers_lm_K <- interactions[interactions$Z_lm_K >= lm_interaction_threshold, ]
suppressors_lm_K <- interactions[interactions$Z_lm_K <= -lm_interaction_threshold, ]
write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
# message("Generating rank plots")
# rank_plot_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = FALSE,
# )
# generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
# page_width = 18, page_height = 12)
message("Generating rank plots")
rank_plot_configs <- generate_rank_plot_configs(
interactions,
is_lm = FALSE,
)
generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs, page_width = 18, page_height = 12)
# message("Generating ranked linear model plots")
# rank_lm_plot_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = TRUE,
# )
# generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
# page_width = 18, page_height = 12)
message("Generating ranked linear model plots")
rank_lm_plot_configs <- generate_rank_plot_configs(
interactions,
is_lm = TRUE,
)
generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs, page_width = 18, page_height = 12)
# message("Generating overlapped ranked plots")
# rank_plot_filtered_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = FALSE,
# filter_na = TRUE,
# overlap_color = TRUE
# )
# generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs,
# page_width = 18, page_height = 12)
message("Generating overlapped ranked plots")
rank_plot_filtered_configs <- generate_rank_plot_configs(
interactions_narm,
is_lm = FALSE,
# filter_na = TRUE,
overlap_color = TRUE
)
generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs, page_width = 18, page_height = 12)
# message("Generating overlapped ranked linear model plots")
# rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
# df_interactions,
# is_lm = TRUE,
# filter_na = TRUE,
# overlap_color = TRUE
# )
# generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs,
# page_width = 18, page_height = 12)
message("Generating overlapped ranked linear model plots")
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
interactions_narm,
is_lm = TRUE,
# filter_na = TRUE,
overlap_color = TRUE
)
generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs, page_width = 18, page_height = 12)
message("Generating correlation curve parameter pair plots")
correlation_plot_configs <- generate_correlation_plot_configs(
df_interactions,
df_reference_interactions
interactions_narm,
interactions_reference
)
generate_and_save_plots(out_dir, "correlation_cpps", correlation_plot_configs,
page_width = 10, page_height = 7)