Refactor calculations to try and grouping issues
This commit is contained in:
@@ -177,7 +177,7 @@ update_gene_names <- function(df, sgd_gene_list) {
|
||||
}
|
||||
|
||||
calculate_summary_stats <- function(df, variables, group_vars) {
|
||||
summary_stats <- df %>%
|
||||
ss <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
N = n(),
|
||||
@@ -192,16 +192,19 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
||||
),
|
||||
.names = "{.fn}_{.col}"
|
||||
),
|
||||
sum_NG = sum(NG, na.rm = TRUE),
|
||||
sum_DB = sum(DB, na.rm = TRUE),
|
||||
sum_SM = sum(SM, na.rm = TRUE),
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
# Create a cleaned version of df that doesn't overlap with summary_stats
|
||||
df_cleaned <- df %>%
|
||||
select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
|
||||
select(-any_of(setdiff(intersect(names(df), names(ss)), group_vars)))
|
||||
|
||||
df_joined <- left_join(df_cleaned, summary_stats, by = group_vars)
|
||||
ss_joined <- left_join(df_cleaned, ss, by = group_vars)
|
||||
|
||||
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
||||
return(list(stats_csv = ss, stats_joined = ss_joined))
|
||||
}
|
||||
|
||||
calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) {
|
||||
@@ -217,11 +220,12 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
group_vars <- c("OrfRep", "Gene", "Drug")
|
||||
}
|
||||
|
||||
perform_lm <- function(x, y, max_conc) {
|
||||
if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) {
|
||||
perform_lm <- function(response, predictor, max_conc) {
|
||||
if (all(is.na(response)) || all(is.na(predictor)) ||
|
||||
length(response[!is.na(response)]) == 0 || length(predictor[!is.na(predictor)]) == 0) {
|
||||
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
|
||||
} else {
|
||||
fit <- lm(y ~ x)
|
||||
fit <- lm(response ~ predictor)
|
||||
return(list(
|
||||
intercept = coef(fit)[1],
|
||||
slope = coef(fit)[2],
|
||||
@@ -247,26 +251,25 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
)
|
||||
|
||||
# Join WT statistics to df
|
||||
df <- df %>%
|
||||
left_join(wt_stats, by = c(bg_group_vars))
|
||||
df <- df %>% left_join(wt_stats, by = bg_group_vars)
|
||||
|
||||
# Compute mean values at zero concentration
|
||||
mean_zeroes <- df %>%
|
||||
filter(conc_num == 0) %>%
|
||||
df_mean_zeros <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
filter(conc_num == 0) %>%
|
||||
summarize(
|
||||
mean_L_zero = mean(mean_L, na.rm = TRUE),
|
||||
mean_K_zero = mean(mean_K, na.rm = TRUE),
|
||||
mean_r_zero = mean(mean_r, na.rm = TRUE),
|
||||
mean_AUC_zero = mean(mean_AUC, na.rm = TRUE),
|
||||
.groups = "drop"
|
||||
.groups = "drop" # Ungroup after summarizing
|
||||
)
|
||||
|
||||
df <- df %>%
|
||||
left_join(mean_zeroes, by = c(group_vars))
|
||||
df <- df %>% left_join(df_mean_zeros, by = group_vars)
|
||||
|
||||
# Calculate Raw Shifts and Z Shifts for all rows
|
||||
df <- df %>%
|
||||
# Calculate per-row data
|
||||
calculations <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
mutate(
|
||||
Raw_Shift_L = mean_L_zero - WT_L,
|
||||
Raw_Shift_K = mean_K_zero - WT_K,
|
||||
@@ -275,69 +278,41 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Z_Shift_L = Raw_Shift_L / WT_sd_L,
|
||||
Z_Shift_K = Raw_Shift_K / WT_sd_K,
|
||||
Z_Shift_r = Raw_Shift_r / WT_sd_r,
|
||||
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC
|
||||
)
|
||||
|
||||
calculations <- df %>%
|
||||
group_by(across(all_of(c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")))) %>%
|
||||
mutate(
|
||||
NG_sum = sum(NG, na.rm = TRUE),
|
||||
DB_sum = sum(DB, na.rm = TRUE),
|
||||
SM_sum = sum(SM, na.rm = TRUE),
|
||||
|
||||
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC,
|
||||
# Expected values
|
||||
Exp_L = WT_L + Raw_Shift_L,
|
||||
Exp_K = WT_K + Raw_Shift_K,
|
||||
Exp_r = WT_r + Raw_Shift_r,
|
||||
Exp_AUC = WT_AUC + Raw_Shift_AUC,
|
||||
|
||||
# Deltas
|
||||
Delta_L = mean_L - Exp_L,
|
||||
Delta_K = mean_K - Exp_K,
|
||||
Delta_r = mean_r - Exp_r,
|
||||
Delta_AUC = mean_AUC - Exp_AUC,
|
||||
|
||||
# Adjust deltas for NG and SM
|
||||
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
|
||||
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
|
||||
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
||||
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
||||
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
|
||||
|
||||
# Calculate Z-scores
|
||||
Zscore_L = Delta_L / WT_sd_L,
|
||||
Zscore_K = Delta_K / WT_sd_K,
|
||||
Zscore_r = Delta_r / WT_sd_r,
|
||||
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
# For interaction plot error bars
|
||||
delta_means_sds <- calculations %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
mean_Delta_L = mean(Delta_L, na.rm = TRUE),
|
||||
mean_Delta_K = mean(Delta_K, na.rm = TRUE),
|
||||
mean_Delta_r = mean(Delta_r, na.rm = TRUE),
|
||||
mean_Delta_AUC = mean(Delta_AUC, na.rm = TRUE),
|
||||
sd_Delta_L = sd(Delta_L, na.rm = TRUE),
|
||||
sd_Delta_K = sd(Delta_K, na.rm = TRUE),
|
||||
sd_Delta_r = sd(Delta_r, na.rm = TRUE),
|
||||
sd_Delta_AUC = sd(Delta_AUC, na.rm = TRUE),
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
calculations <- calculations %>%
|
||||
left_join(delta_means_sds, by = group_vars)
|
||||
|
||||
# Calculate group-specific interactions
|
||||
interactions <- calculations %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
NG_sum_int = sum(NG),
|
||||
DB_sum_int = sum(DB),
|
||||
SM_sum_int = sum(SM),
|
||||
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
||||
num_non_removed_concs = total_conc_num - sum(DB) - 1,
|
||||
|
||||
# Count QA columns, this will clobber the originals in left join but
|
||||
# we're not using them after this point so it's cleaner this way
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
DB = sum(DB, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE),
|
||||
|
||||
# Add background data
|
||||
Raw_Shift_L = first(Raw_Shift_L),
|
||||
@@ -356,7 +331,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
|
||||
|
||||
# We sum twice but it saves on creating another block
|
||||
# TODO should we use mean() here, not sure
|
||||
# TODO should we use mean() here, original logic needs explanation
|
||||
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
|
||||
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
|
||||
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
|
||||
@@ -392,7 +367,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
|
||||
# Summary statistics for lm scores
|
||||
interactions <- interactions %>%
|
||||
# group_by(across(all_of(group_vars))) %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
mutate(
|
||||
lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
|
||||
lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
|
||||
@@ -407,46 +382,42 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
|
||||
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
|
||||
) %>%
|
||||
arrange(desc(Z_lm_L), desc(NG_sum_int))
|
||||
arrange(desc(Z_lm_L), desc(NG))
|
||||
|
||||
# Deletion data ranking and linear modeling
|
||||
# Initialize this variable so we can return it as NULL for reference
|
||||
# We don't do the following calculations for the reference strain
|
||||
interactions_narm <- NULL # formerly X_NArm
|
||||
|
||||
if (type == "deletion") {
|
||||
interactions <- interactions %>%
|
||||
mutate(
|
||||
Avg_Zscore_L_adjusted = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
|
||||
Avg_Zscore_K_adjusted = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
|
||||
Avg_Zscore_r_adjusted = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
|
||||
Avg_Zscore_AUC_adjusted = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
|
||||
Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
|
||||
Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
|
||||
Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
|
||||
Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
|
||||
Rank_L = rank(Avg_Zscore_L_adjusted),
|
||||
Rank_K = rank(Avg_Zscore_K_adjusted),
|
||||
Rank_r = rank(Avg_Zscore_r_adjusted),
|
||||
Rank_AUC = rank(Avg_Zscore_AUC_adjusted),
|
||||
Rank_lm_L = rank(Z_lm_L_adjusted),
|
||||
Rank_lm_K = rank(Z_lm_K_adjusted),
|
||||
Rank_lm_r = rank(Z_lm_r_adjusted),
|
||||
Rank_lm_AUC = rank(Z_lm_AUC_adjusted),
|
||||
Rank_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
|
||||
Rank_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
||||
Rank_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
||||
Rank_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
||||
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
|
||||
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
|
||||
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
|
||||
Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
|
||||
Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
|
||||
Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
|
||||
Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
|
||||
Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
|
||||
Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
|
||||
Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc))
|
||||
# Set NA values to a small value to include them in ranking
|
||||
Avg_Zscore_adjusted_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
|
||||
Avg_Zscore_adjusted_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
|
||||
Avg_Zscore_adjusted_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
|
||||
Avg_Zscore_adjusted_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
|
||||
Z_lm_adjusted_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
|
||||
Z_lm_adjusted_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
|
||||
Z_lm_adjusted_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
|
||||
Z_lm_adjusted_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
|
||||
Rank_L = rank(Avg_Zscore_adjusted_L),
|
||||
Rank_K = rank(Avg_Zscore_adjusted_K),
|
||||
Rank_r = rank(Avg_Zscore_adjusted_r),
|
||||
Rank_AUC = rank(Avg_Zscore_adjusted_AUC),
|
||||
Rank_lm_L = rank(Z_lm_adjusted_L),
|
||||
Rank_lm_K = rank(Z_lm_adjusted_K),
|
||||
Rank_lm_r = rank(Z_lm_adjusted_r),
|
||||
Rank_lm_AUC = rank(Z_lm_adjusted_AUC),
|
||||
Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
|
||||
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
||||
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
||||
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
||||
)
|
||||
|
||||
# Add overlap threshold categories based on Z-lm and Avg-Z scores
|
||||
interactions <- interactions %>%
|
||||
# We are filtering invalid Z_lm scores so this must be in its own df
|
||||
# A left join will just reintroduce NAs by coercion
|
||||
interactions_narm <- interactions %>%
|
||||
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
||||
mutate(
|
||||
Overlap = case_when(
|
||||
@@ -460,11 +431,32 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
||||
TRUE ~ "No Effect"
|
||||
),
|
||||
|
||||
Rank_lm_R_squared_L = Rank_lm_L[[1]]$r_squared,
|
||||
Rank_lm_R_squared_K = Rank_lm_L[[1]]$r_squared,
|
||||
Rank_lm_R_squared_r = Rank_lm_r[[1]]$r_squared,
|
||||
Rank_lm_R_squared_AUC = Rank_lm_AUC[[1]]$r_squared,
|
||||
Rank_L = rank(Avg_Zscore_L),
|
||||
Rank_K = rank(Avg_Zscore_K),
|
||||
Rank_r = rank(Avg_Zscore_r),
|
||||
Rank_AUC = rank(Avg_Zscore_AUC),
|
||||
Rank_lm_L = rank(Z_lm_L),
|
||||
Rank_lm_K = rank(Z_lm_K),
|
||||
Rank_lm_r = rank(Z_lm_r),
|
||||
Rank_lm_AUC = rank(Z_lm_AUC),
|
||||
Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
|
||||
Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
|
||||
Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
|
||||
Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
|
||||
# Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared,
|
||||
# Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared,
|
||||
# Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared,
|
||||
# Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared,
|
||||
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
|
||||
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
|
||||
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
|
||||
Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
|
||||
Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
|
||||
Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
|
||||
Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
|
||||
Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
|
||||
Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
|
||||
Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)),
|
||||
Correlation_lm_intercept_L = Correlation_lm_L[[1]]$intercept,
|
||||
Correlation_lm_slope_L = Correlation_lm_L[[1]]$slope,
|
||||
Correlation_lm_R_Squared_L = Correlation_lm_L[[1]]$r_squared,
|
||||
@@ -483,97 +475,77 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
||||
Correlation_lm_Score_AUC = Correlation_lm_AUC[[1]]$score,
|
||||
Correlation_lm_intercept_K_L = Correlation_lm_K_L[[1]]$intercept,
|
||||
Correlation_lm_slope_K_L = Correlation_lm_K_L[[1]]$slope,
|
||||
Correlation_lm_R_squared_K_L = Correlation_lm_K_L[[1]]$r_squared,
|
||||
Correlation_lm_R_Squared_K_L = Correlation_lm_K_L[[1]]$r_squared,
|
||||
Correlation_lm_Score_K_L = Correlation_lm_K_L[[1]]$score,
|
||||
Correlation_lm_intercept_r_L = Correlation_lm_r_L[[1]]$intercept,
|
||||
Correlation_lm_slope_r_L = Correlation_lm_r_L[[1]]$slope,
|
||||
Correlation_lm_R_squared_r_L = Correlation_lm_r_L[[1]]$r_squared,
|
||||
Correlation_lm_R_Squared_r_L = Correlation_lm_r_L[[1]]$r_squared,
|
||||
Correlation_lm_Score_r_L = Correlation_lm_r_L[[1]]$score,
|
||||
Correlation_lm_intercept_AUC_L = Correlation_lm_AUC_L[[1]]$intercept,
|
||||
Correlation_lm_slope_AUC_L = Correlation_lm_AUC_L[[1]]$slope,
|
||||
Correlation_lm_R_squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared,
|
||||
Correlation_lm_R_Squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared,
|
||||
Correlation_lm_Score_AUC_L = Correlation_lm_AUC_L[[1]]$score,
|
||||
Correlation_lm_intercept_r_K = Correlation_lm_r_K[[1]]$intercept,
|
||||
Correlation_lm_slope_r_K = Correlation_lm_r_K[[1]]$slope,
|
||||
Correlation_lm_R_squared_r_K = Correlation_lm_r_K[[1]]$r_squared,
|
||||
Correlation_lm_R_Squared_r_K = Correlation_lm_r_K[[1]]$r_squared,
|
||||
Correlation_lm_Score_r_K = Correlation_lm_r_K[[1]]$score,
|
||||
Correlation_lm_intercept_AUC_K = Correlation_lm_AUC_K[[1]]$intercept,
|
||||
Correlation_lm_slope_AUC_K = Correlation_lm_AUC_K[[1]]$slope,
|
||||
Correlation_lm_R_squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared,
|
||||
Correlation_lm_R_Squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared,
|
||||
Correlation_lm_Score_AUC_K = Correlation_lm_AUC_K[[1]]$score,
|
||||
Correlation_lm_intercept_AUC_r = Correlation_lm_AUC_r[[1]]$intercept,
|
||||
Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
|
||||
Correlation_lm_R_squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
|
||||
Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
|
||||
Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
|
||||
)
|
||||
}
|
||||
|
||||
# Create the final calculations and interactions dataframes with specific columns for csv output
|
||||
# Trying to mimic original output data
|
||||
df_calculations <- calculations %>%
|
||||
select(all_of(c(
|
||||
group_vars, # necessary for full_data left_join
|
||||
"conc_num", "conc_num_factor", "conc_num_factor_factor", "N",
|
||||
"mean_L", "median_L", "sd_L", "se_L",
|
||||
"mean_K", "median_K", "sd_K", "se_K",
|
||||
"mean_r", "median_r", "sd_r", "se_r",
|
||||
"mean_AUC", "median_AUC", "sd_AUC", "se_AUC",
|
||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||
"WT_L", "WT_K", "WT_r", "WT_AUC",
|
||||
"WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
|
||||
"Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
|
||||
"Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
|
||||
"mean_Delta_L", "mean_Delta_K", "mean_Delta_r", "mean_Delta_AUC",
|
||||
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
|
||||
"NG_sum", "DB_sum", "SM_sum"
|
||||
))) %>%
|
||||
rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)
|
||||
calculations_csv <- calculations %>%
|
||||
select(
|
||||
all_of(group_vars),
|
||||
conc_num, conc_num_factor, conc_num_factor_factor,
|
||||
N, NG, DB, SM,
|
||||
mean_L, median_L, sd_L, se_L,
|
||||
mean_K, median_K, sd_K, se_K,
|
||||
mean_r, median_r, sd_r, se_r,
|
||||
mean_AUC, median_AUC, sd_AUC, se_AUC,
|
||||
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
||||
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
|
||||
WT_L, WT_K, WT_r, WT_AUC,
|
||||
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
|
||||
Exp_L, Exp_K, Exp_r, Exp_AUC,
|
||||
Delta_L, Delta_K, Delta_r, Delta_AUC,
|
||||
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
|
||||
)
|
||||
|
||||
df_interactions <- interactions %>%
|
||||
select(any_of(c(
|
||||
group_vars, # necessary for full_data left_join
|
||||
"Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
|
||||
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
||||
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
|
||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||
"lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
|
||||
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
||||
"NG_sum_int", "DB_sum_int", "SM_sum_int",
|
||||
"Rank_lm_R_squared_L", "Rank_lm_R_squared_K", "Rank_lm_R_squared_r", "Rank_lm_R_squared_AUC",
|
||||
"Correlation_lm_intercept_L", "Correlation_lm_slope_L", "Correlation_lm_R_squared_L", "Correlation_lm_Score_L",
|
||||
"Correlation_lm_intercept_K", "Correlation_lm_slope_K", "Correlation_lm_R_squared_K", "Correlation_lm_Score_K",
|
||||
"Correlation_lm_intercept_r", "Correlation_lm_slope_r", "Correlation_lm_R_squared_r", "Correlation_lm_Score_r",
|
||||
"Correlation_lm_intercept_AUC", "Correlation_lm_slope_AUC", "Correlation_lm_R_squared_AUC", "Correlation_lm_Score_AUC",
|
||||
"Correlation_lm_intercept_K_L", "Correlation_lm_slope_K_L", "Correlation_lm_R_squared_K_L", "Correlation_lm_Score_K_L",
|
||||
"Correlation_lm_intercept_r_L", "Correlation_lm_slope_r_L", "Correlation_lm_R_squared_r_L", "Correlation_lm_Score_r_L",
|
||||
"Correlation_lm_intercept_AUC_L", "Correlation_lm_slope_AUC_L", "Correlation_lm_R_squared_AUC_L", "Correlation_lm_Score_AUC_L",
|
||||
"Correlation_lm_intercept_r_K", "Correlation_lm_slope_r_K", "Correlation_lm_R_squared_r_K", "Correlation_lm_Score_r_K",
|
||||
"Correlation_lm_intercept_AUC_K", "Correlation_lm_slope_AUC_K", "Correlation_lm_R_squared_AUC_K", "Correlation_lm_Score_AUC_K",
|
||||
"Correlation_lm_intercept_AUC_r", "Correlation_lm_slope_AUC_r", "Correlation_lm_R_squared_AUC_r", "Correlation_lm_Score_AUC_r",
|
||||
"Overlap"
|
||||
))) %>%
|
||||
rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
|
||||
interactions_csv <- interactions %>%
|
||||
select(
|
||||
all_of(group_vars),
|
||||
NG, DB, SM,
|
||||
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
|
||||
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
|
||||
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
||||
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
|
||||
Sum_Z_Score_L, Sum_Z_Score_K, Sum_Z_Score_r, Sum_Z_Score_AUC,
|
||||
lm_Score_L, lm_Score_K, lm_Score_r, lm_Score_AUC,
|
||||
R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC
|
||||
)
|
||||
|
||||
# Avoid column collision on left join for overlapping variables
|
||||
calculations_no_overlap <- calculations %>%
|
||||
select(-any_of(c("DB", "NG", "SM",
|
||||
# Don't need these anywhere so easier to remove
|
||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
||||
# we need these for the interactions but the original code has the same names in both datasets
|
||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC"
|
||||
)))
|
||||
# Filter the calculations dataframe so we don't clobber on left join
|
||||
calculations_selected <- calculations %>%
|
||||
select(all_of(c(group_vars, setdiff(names(calculations), names(interactions)))))
|
||||
|
||||
full_data <- calculations_no_overlap %>%
|
||||
left_join(interactions, by = group_vars)
|
||||
interactions_joined <- left_join(calculations_selected, interactions, by = group_vars)
|
||||
|
||||
# Return final dataframes
|
||||
return(list(
|
||||
calculations = df_calculations,
|
||||
interactions = df_interactions,
|
||||
full_data = full_data
|
||||
calculations_csv = calculations_csv,
|
||||
interactions_csv = interactions_csv,
|
||||
interactions = interactions,
|
||||
interactions_joined = interactions_joined,
|
||||
interactions_narm = interactions_narm
|
||||
))
|
||||
}
|
||||
|
||||
@@ -1068,9 +1040,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
|
||||
}
|
||||
|
||||
for (var in names(delta_limits_map)) {
|
||||
y_var_name <- paste0("Delta_", var)
|
||||
y_limits <- delta_limits_map[[var]]
|
||||
y_span <- y_limits[2] - y_limits[1]
|
||||
y_var_name <- paste0("Delta_", var)
|
||||
|
||||
# Anti-filter to select out-of-bounds rows
|
||||
out_of_bounds <- group_data %>%
|
||||
@@ -1095,19 +1067,17 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
|
||||
next # skip plot if insufficient data is available
|
||||
}
|
||||
|
||||
WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0)
|
||||
Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2)
|
||||
Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2)
|
||||
R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2)
|
||||
|
||||
NG_value <- first(group_data_filtered$NG, default = 0)
|
||||
DB_value <- first(group_data_filtered$DB, default = 0)
|
||||
SM_value <- first(group_data_filtered$SM, default = 0)
|
||||
NG <- first(group_data_filtered$NG)
|
||||
DB <- first(group_data_filtered$DB)
|
||||
SM <- first(group_data_filtered$SM)
|
||||
|
||||
lm_intercept_col <- paste0("lm_intercept_", var)
|
||||
lm_slope_col <- paste0("lm_slope_", var)
|
||||
lm_intercept <- first(group_data_filtered[[lm_intercept_col]], default = 0)
|
||||
lm_slope <- first(group_data_filtered[[lm_slope_col]], default = 0)
|
||||
lm_intercept <- first(group_data_filtered[[lm_intercept_col]])
|
||||
lm_slope <- first(group_data_filtered[[lm_slope_col]])
|
||||
|
||||
plot_config <- list(
|
||||
df = group_data_filtered,
|
||||
@@ -1122,10 +1092,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
|
||||
annotations = list(
|
||||
list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste(" ZShift =", round(Z_Shift_value, 2))),
|
||||
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste(" lm ZScore =", round(Z_lm_value, 2))),
|
||||
# list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste(" R-squared =", round(R_squared_value, 2))),
|
||||
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG_value)),
|
||||
list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB_value)),
|
||||
list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
|
||||
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG)),
|
||||
list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB)),
|
||||
list(x = 1, y = y_limits[1], label = paste("SM =", SM))
|
||||
),
|
||||
error_bar = TRUE,
|
||||
error_bar_params = list(
|
||||
@@ -1267,7 +1236,7 @@ generate_correlation_plot_configs <- function(df, df_reference) {
|
||||
|
||||
intercept <- df[[paste0("Correlation_lm_intercept_", rel$y, "_", rel$x)]][1]
|
||||
slope <- df[[paste0("Correlation_lm_slope_", rel$y, "_", rel$x)]][1]
|
||||
r_squared <- df[[paste0("Correlation_lm_R_squared_", rel$y, "_", rel$x)]][1]
|
||||
r_squared <- df[[paste0("Correlation_lm_R_Squared_", rel$y, "_", rel$x)]][1]
|
||||
r_squared_rounded <- round(r_squared, 4)
|
||||
r_squared_label <- paste("R-squared =", r_squared_rounded)
|
||||
xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]])))
|
||||
@@ -1355,7 +1324,7 @@ main <- function() {
|
||||
df_stats <- calculate_summary_stats( # formerly X_stats_ALL
|
||||
df = df,
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))$df_with_stats
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))$stats_joined
|
||||
|
||||
frequency_delta_bg_plot_configs <- list(
|
||||
plots = list(
|
||||
@@ -1422,11 +1391,10 @@ main <- function() {
|
||||
df = df_na,
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))
|
||||
df_na_ss <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats # formerly X_stats_ALL
|
||||
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
stats_na <- ss$stats_joined # formerly X_stats_ALL
|
||||
write.csv(ss$stats_csv, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
# This can help bypass missing values ggplot warnings during testing
|
||||
df_na_stats_filtered <- df_na_stats %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite))
|
||||
stats_na_filtered <- stats_na %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite))
|
||||
|
||||
message("Calculating summary statistics excluding zero values")
|
||||
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
||||
@@ -1434,40 +1402,40 @@ main <- function() {
|
||||
df = df_no_zeros,
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
group_vars = c("conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
)$stats_joined
|
||||
|
||||
message("Filtering by 2SD of K")
|
||||
df_na_within_2sd_k <- df_na_stats %>%
|
||||
df_na_within_2sd_k <- stats_na %>%
|
||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
||||
df_na_outside_2sd_k <- df_na_stats %>%
|
||||
df_na_outside_2sd_k <- stats_na %>%
|
||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
||||
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", # formerly X_stats_BY_L_within_2SD_K
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))$summary_stats
|
||||
write.csv(ss,
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))
|
||||
write.csv(ss$stats_csv,
|
||||
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Calculating summary statistics for L outside 2SD of K")
|
||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", # formerly X_stats_BY_L_outside_2SD_K
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))
|
||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
||||
write.csv(ss$summary_stats,
|
||||
df_na_l_outside_2sd_k_stats <- ss$stats_joined
|
||||
write.csv(ss$stats_csv,
|
||||
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
df_before = df_stats,
|
||||
df_after = df_na_stats_filtered
|
||||
df_after = stats_na_filtered
|
||||
)
|
||||
|
||||
plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
df_before = df_stats,
|
||||
df_after = df_na_stats_filtered,
|
||||
df_after = stats_na_filtered,
|
||||
plot_type = "box"
|
||||
)
|
||||
|
||||
@@ -1587,17 +1555,17 @@ main <- function() {
|
||||
filter(!is.na(L))
|
||||
|
||||
message("Calculating background summary statistics")
|
||||
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
|
||||
ss <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
|
||||
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
|
||||
summary_stats_bg <- ss_bg$summary_stats
|
||||
df_bg_stats <- ss_bg$df_with_stats
|
||||
stats_background_csv <- ss$stats_csv
|
||||
stats_background <- ss$stats_joined
|
||||
write.csv(
|
||||
summary_stats_bg,
|
||||
stats_background_csv,
|
||||
file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Setting missing reference values to the highest theoretical value at each drug conc for L")
|
||||
df_reference <- df_na_stats %>% # formerly X2_RF
|
||||
df_reference <- stats_na %>% # formerly X2_RF
|
||||
filter(OrfRep == strain) %>%
|
||||
filter(!is.na(L)) %>%
|
||||
group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>%
|
||||
@@ -1609,14 +1577,14 @@ main <- function() {
|
||||
ungroup()
|
||||
|
||||
message("Calculating reference strain summary statistics")
|
||||
df_reference_summary_stats <- calculate_summary_stats( # formerly X_stats_X2_RF
|
||||
stats_reference <- calculate_summary_stats( # formerly X_stats_X2_RF
|
||||
df = df_reference,
|
||||
variables = c("L", "K", "r", "AUC"),
|
||||
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
)$stats_joined
|
||||
|
||||
# Summarise statistics for error bars
|
||||
df_reference_summary_stats <- df_reference_summary_stats %>%
|
||||
stats_reference <- stats_reference %>%
|
||||
group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>%
|
||||
mutate(
|
||||
mean_mean_L = first(mean_L),
|
||||
@@ -1631,26 +1599,27 @@ main <- function() {
|
||||
)
|
||||
|
||||
message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
|
||||
df_reference_interaction_stats <- calculate_summary_stats(
|
||||
stats_interactions_reference <- calculate_summary_stats(
|
||||
df = df_reference,
|
||||
variables = c("L", "K", "r", "AUC"),
|
||||
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
)$stats_joined
|
||||
|
||||
message("Calculating reference strain interaction scores")
|
||||
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
|
||||
df_reference_calculations <- reference_results$calculations
|
||||
df_reference_interactions_joined <- reference_results$full_data
|
||||
df_reference_interactions <- reference_results$interactions
|
||||
write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||
reference_results <- calculate_interaction_scores(stats_interactions_reference, stats_background, "reference")
|
||||
calculations_reference_csv <- reference_results$calculations_csv
|
||||
interactions_reference_csv <- reference_results$interactions_csv
|
||||
interactions_reference <- reference_results$interactions
|
||||
interactions_reference_joined <- reference_results$interactions_joined
|
||||
write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||
write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||
|
||||
# message("Generating reference interaction plots")
|
||||
# reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
|
||||
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
||||
message("Generating reference interaction plots")
|
||||
reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference")
|
||||
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
||||
|
||||
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
||||
df_deletion <- df_na_stats %>% # formerly X2
|
||||
deletion_adjusted <- stats_na %>% # formerly X2
|
||||
filter(OrfRep != strain) %>%
|
||||
filter(!is.na(L)) %>%
|
||||
group_by(OrfRep, Gene, conc_num, conc_num_factor_factor) %>%
|
||||
@@ -1662,96 +1631,94 @@ main <- function() {
|
||||
ungroup()
|
||||
|
||||
message("Calculating deletion strain(s) interaction summary statistics")
|
||||
df_deletion_stats <- calculate_summary_stats(
|
||||
df = df_deletion,
|
||||
deletion_stats <- calculate_summary_stats(
|
||||
df = deletion_adjusted,
|
||||
variables = c("L", "K", "r", "AUC"),
|
||||
group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
)$stats_joined
|
||||
|
||||
message("Calculating deletion strain(s) interactions scores")
|
||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion")
|
||||
df_calculations <- deletion_results$calculations
|
||||
df_interactions <- deletion_results$interactions
|
||||
df_interactions_joined <- deletion_results$full_data
|
||||
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
||||
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
||||
deletion_results <- calculate_interaction_scores(deletion_stats, stats_background, "deletion")
|
||||
calculations_csv <- deletion_results$calculations_csv
|
||||
interactions_csv <- deletion_results$interactions_csv
|
||||
interactions <- deletion_results$interactions
|
||||
interactions_joined <- deletion_results$interactions_joined
|
||||
interactions_narm <- deletion_results$interactions_narm
|
||||
write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
||||
write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
||||
|
||||
# message("Generating deletion interaction plots")
|
||||
# deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
|
||||
# generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
|
||||
message("Generating deletion interaction plots")
|
||||
deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion")
|
||||
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
|
||||
|
||||
# message("Writing enhancer/suppressor csv files")
|
||||
# interaction_threshold <- 2 # TODO add to study config?
|
||||
# enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
|
||||
# suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
|
||||
# enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
|
||||
# suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
|
||||
# enhancers_L <- df_interactions[enhancer_condition_L, ]
|
||||
# suppressors_L <- df_interactions[suppressor_condition_L, ]
|
||||
# enhancers_K <- df_interactions[enhancer_condition_K, ]
|
||||
# suppressors_K <- df_interactions[suppressor_condition_K, ]
|
||||
# enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
|
||||
# enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
|
||||
# write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
|
||||
# write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
|
||||
# write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
|
||||
# write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
|
||||
# write.csv(enhancers_and_suppressors_L,
|
||||
# file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
|
||||
# write.csv(enhancers_and_suppressors_K,
|
||||
# file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
|
||||
message("Writing enhancer/suppressor csv files")
|
||||
interaction_threshold <- 2 # TODO add to study config?
|
||||
enhancer_condition_L <- interactions$Avg_Zscore_L >= interaction_threshold
|
||||
suppressor_condition_L <- interactions$Avg_Zscore_L <= -interaction_threshold
|
||||
enhancer_condition_K <- interactions$Avg_Zscore_K >= interaction_threshold
|
||||
suppressor_condition_K <- interactions$Avg_Zscore_K <= -interaction_threshold
|
||||
enhancers_L <- interactions[enhancer_condition_L, ]
|
||||
suppressors_L <- interactions[suppressor_condition_L, ]
|
||||
enhancers_K <- interactions[enhancer_condition_K, ]
|
||||
suppressors_K <- interactions[suppressor_condition_K, ]
|
||||
enhancers_and_suppressors_L <- interactions[enhancer_condition_L | suppressor_condition_L, ]
|
||||
enhancers_and_suppressors_K <- interactions[enhancer_condition_K | suppressor_condition_K, ]
|
||||
write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_and_suppressors_L,
|
||||
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_and_suppressors_K,
|
||||
file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
|
||||
|
||||
# message("Writing linear model enhancer/suppressor csv files")
|
||||
# lm_interaction_threshold <- 2 # TODO add to study config?
|
||||
# enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
|
||||
# suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
|
||||
# enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
|
||||
# suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
|
||||
# write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
|
||||
# write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
|
||||
# write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
|
||||
# write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
|
||||
message("Writing linear model enhancer/suppressor csv files")
|
||||
lm_interaction_threshold <- 2 # TODO add to study config?
|
||||
enhancers_lm_L <- interactions[interactions$Z_lm_L >= lm_interaction_threshold, ]
|
||||
suppressors_lm_L <- interactions[interactions$Z_lm_L <= -lm_interaction_threshold, ]
|
||||
enhancers_lm_K <- interactions[interactions$Z_lm_K >= lm_interaction_threshold, ]
|
||||
suppressors_lm_K <- interactions[interactions$Z_lm_K <= -lm_interaction_threshold, ]
|
||||
write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
|
||||
|
||||
# message("Generating rank plots")
|
||||
# rank_plot_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = FALSE,
|
||||
# )
|
||||
# generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
|
||||
# page_width = 18, page_height = 12)
|
||||
message("Generating rank plots")
|
||||
rank_plot_configs <- generate_rank_plot_configs(
|
||||
interactions,
|
||||
is_lm = FALSE,
|
||||
)
|
||||
generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs, page_width = 18, page_height = 12)
|
||||
|
||||
# message("Generating ranked linear model plots")
|
||||
# rank_lm_plot_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = TRUE,
|
||||
# )
|
||||
# generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
|
||||
# page_width = 18, page_height = 12)
|
||||
message("Generating ranked linear model plots")
|
||||
rank_lm_plot_configs <- generate_rank_plot_configs(
|
||||
interactions,
|
||||
is_lm = TRUE,
|
||||
)
|
||||
generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs, page_width = 18, page_height = 12)
|
||||
|
||||
# message("Generating overlapped ranked plots")
|
||||
# rank_plot_filtered_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = FALSE,
|
||||
message("Generating overlapped ranked plots")
|
||||
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
||||
interactions_narm,
|
||||
is_lm = FALSE,
|
||||
# filter_na = TRUE,
|
||||
# overlap_color = TRUE
|
||||
# )
|
||||
# generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs,
|
||||
# page_width = 18, page_height = 12)
|
||||
overlap_color = TRUE
|
||||
)
|
||||
generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs, page_width = 18, page_height = 12)
|
||||
|
||||
# message("Generating overlapped ranked linear model plots")
|
||||
# rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
||||
# df_interactions,
|
||||
# is_lm = TRUE,
|
||||
message("Generating overlapped ranked linear model plots")
|
||||
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
||||
interactions_narm,
|
||||
is_lm = TRUE,
|
||||
# filter_na = TRUE,
|
||||
# overlap_color = TRUE
|
||||
# )
|
||||
# generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs,
|
||||
# page_width = 18, page_height = 12)
|
||||
overlap_color = TRUE
|
||||
)
|
||||
generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs, page_width = 18, page_height = 12)
|
||||
|
||||
message("Generating correlation curve parameter pair plots")
|
||||
correlation_plot_configs <- generate_correlation_plot_configs(
|
||||
df_interactions,
|
||||
df_reference_interactions
|
||||
interactions_narm,
|
||||
interactions_reference
|
||||
)
|
||||
generate_and_save_plots(out_dir, "correlation_cpps", correlation_plot_configs,
|
||||
page_width = 10, page_height = 7)
|
||||
|
||||
Reference in New Issue
Block a user