Create and join dataframes in calculate_summary_stats
This commit is contained in:
@@ -112,21 +112,12 @@ scale_colour_publication <- function(...) {
|
||||
load_and_process_data <- function(easy_results_file, sd = 3) {
|
||||
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||
|
||||
# Clean and convert columns to numeric where appropriate
|
||||
df <- df %>%
|
||||
filter(!(.[[1]] %in% c("", "Scan"))) %>%
|
||||
filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>%
|
||||
mutate(
|
||||
Col = as.numeric(Col),
|
||||
Row = as.numeric(Row),
|
||||
num = as.numeric(Num.),
|
||||
L = as.numeric(l),
|
||||
K = as.numeric(K),
|
||||
r = as.numeric(r),
|
||||
scan = as.numeric(Scan),
|
||||
AUC = as.numeric(AUC96),
|
||||
last_bg = as.numeric(LstBackgrd),
|
||||
first_bg = as.numeric(X1stBackgrd),
|
||||
across(c(Col, Row, Num.), as.numeric),
|
||||
across(c(L, K, r, scan, AUC96, LstBackgrd, X1stBackgrd), as.numeric),
|
||||
delta_bg = last_bg - first_bg,
|
||||
delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
|
||||
NG = if_else(L == 0 & !is.na(L), 1, 0),
|
||||
@@ -134,11 +125,13 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
|
||||
SM = 0,
|
||||
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
|
||||
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
||||
conc_num_factor = as.numeric(as.factor(conc_num)) - 1)
|
||||
conc_num_factor = as.numeric(as.factor(conc_num)) - 1
|
||||
)
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
|
||||
# Update Gene names using the SGD gene list
|
||||
update_gene_names <- function(df, sgd_gene_list) {
|
||||
# Load SGD gene list
|
||||
@@ -189,6 +182,7 @@ process_strains <- function(df) {
|
||||
|
||||
# Calculate summary statistics for all variables
|
||||
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
||||
# Generate summary statistics
|
||||
summary_stats <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
reframe(across(all_of(variables), list(
|
||||
@@ -198,13 +192,20 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
||||
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Return NA if all values are NA
|
||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Return NA if all values are NA
|
||||
sd = ~sd(., na.rm = TRUE), # Standard deviation ignoring NAs
|
||||
se = ~ifelse(.N > 1, sd(., na.rm = TRUE) / sqrt(.N - 1), NA), # Standard Error using precomputed N
|
||||
z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA, (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
|
||||
se = ~ifelse(N > 1, sd(., na.rm = TRUE) / sqrt(N - 1), NA) # Standard Error using precomputed N
|
||||
# TODO: not in original stats but better to do here than in calculate_interactions?
|
||||
# z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA,
|
||||
# (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
|
||||
), .names = "{.fn}_{.col}"))
|
||||
|
||||
return(summary_stats)
|
||||
# Join the summary stats back to the original dataframe
|
||||
df_with_stats <- left_join(df, summary_stats, by = group_vars)
|
||||
|
||||
# Return both the summary stats and the updated dataframe
|
||||
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
||||
}
|
||||
|
||||
|
||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||
|
||||
# Calculate total concentration variables
|
||||
@@ -215,7 +216,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
||||
print("Calculating background means")
|
||||
bg_L <- df %>% filter(conc_num_factor == 0) %>% pull(mean_L) %>% first()
|
||||
bg_K <- df %>% filter(conc_num_factor == 0) %>% pull(mean_K) %>% first()
|
||||
bg_AUC <- df %>% filter(conc_num_factor == 0) %>% pull(mean_r) %>% first()
|
||||
bg_r <- df %>% filter(conc_num_factor == 0) %>% pull(mean_r) %>% first()
|
||||
bg_AUC <- df %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) %>% first()
|
||||
|
||||
bg_sd_L <- df %>% filter(conc_num_factor == 0) %>% pull(sd_L) %>% first()
|
||||
@@ -223,57 +224,43 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
||||
bg_sd_r <- df %>% filter(conc_num_factor == 0) %>% pull(sd_r) %>% first()
|
||||
bg_sd_AUC <- df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
|
||||
|
||||
# Calculate summary statistics and shifts
|
||||
# Calculate interaction scores
|
||||
print("Calculating interaction scores part 1")
|
||||
interaction_scores <- df %>%
|
||||
group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
|
||||
summarise(
|
||||
NG = sum(NG),
|
||||
DB = sum(DB),
|
||||
SM = sum(SM)
|
||||
mutate(
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
DB = sum(DB, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE)
|
||||
) %>%
|
||||
summarise(across(all_of(variables), list(
|
||||
mutate(across(all_of(variables), list(
|
||||
N = ~sum(!is.na(.)), # Count of non-NA values
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
se = ~sd(., na.rm = TRUE) / sqrt(N - 1)
|
||||
se = ~sd(., na.rm = TRUE) / sqrt(N - 1), # Standard error calculation
|
||||
Raw_Shift = ~mean(., na.rm = TRUE) - case_when(
|
||||
cur_column() == "L" ~ bg_L,
|
||||
cur_column() == "K" ~ bg_K,
|
||||
cur_column() == "r" ~ bg_r,
|
||||
cur_column() == "AUC" ~ bg_AUC
|
||||
),
|
||||
Z_Shift = ~ Raw_Shift / case_when(
|
||||
cur_column() == "L" ~ bg_sd_L,
|
||||
cur_column() == "K" ~ bg_sd_K,
|
||||
cur_column() == "r" ~ bg_sd_r,
|
||||
cur_column() == "AUC" ~ bg_sd_AUC
|
||||
),
|
||||
WT = ~mean(., na.rm = TRUE),
|
||||
WT_sd = ~sd(., na.rm = TRUE),
|
||||
Exp = ~ WT + Raw_Shift,
|
||||
Delta = ~ WT - Exp,
|
||||
Delta = if_else(NG == 1, mean - WT, Delta)
|
||||
), .names = "{.fn}_{.col}")) %>%
|
||||
summarise(
|
||||
Raw_Shift_L = mean_L[[1]] - bg_L,
|
||||
Raw_Shift_K = mean_K[[1]] - bg_K,
|
||||
Raw_Shift_r = mean_r[[1]] - bg_r,
|
||||
Raw_Shift_AUC = mean_AUC[[1]] - bg_AUC,
|
||||
Z_Shift_L = Raw_Shift_L[[1]] / bg_sd_L,
|
||||
Z_Shift_K = Raw_Shift_K[[1]] / bg_sd_K,
|
||||
Z_Shift_r = Raw_Shift_r[[1]] / bg_sd_r,
|
||||
Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd_AUC,
|
||||
WT_L = mean_L,
|
||||
WT_K = mean_K,
|
||||
WT_r = mean_r,
|
||||
WT_AUC = mean_AUC,
|
||||
WT_sd_L = sd_L,
|
||||
WT_sd_K = sd_K,
|
||||
WT_sd_r = sd_r,
|
||||
WT_sd_AUC = sd_AUC,
|
||||
Exp_L = WT_L + Raw_Shift_L,
|
||||
Exp_K = WT_K + Raw_Shift_K,
|
||||
Exp_r = WT_r + Raw_Shift_r,
|
||||
Exp_AUC = WT_AUC + Raw_Shift_AUC,
|
||||
Delta_L = mean_L - Exp_L,
|
||||
Delta_K = mean_K - Exp_K,
|
||||
Delta_r = mean_r - Exp_r,
|
||||
Delta_AUC = mean_AUC - Exp_AUC,
|
||||
Delta_L = ifelse(NG == 1, mean_L - WT_L, Delta_L), # disregard shift for no growth values in Z score calculation
|
||||
Delta_L = ifelse(SM == 1, mean_L - WT_L, Delta_L), # disregard shift for set to max values in Z score calculation
|
||||
Delta_K = ifelse(NG == 1, mean_K - WT_K, Delta_K),
|
||||
Delta_r = ifelse(NG == 1, mean_r - WT_r, Delta_r),
|
||||
Delta_AUC = ifelse(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
||||
Zscore_L = Delta_L / WT_sd_L,
|
||||
Zscore_K = Delta_K / WT_sd_K,
|
||||
Zscore_r = Delta_r / WT_sd_r,
|
||||
Zscore_AUC = Delta_AUC / WT_sd_AUC,
|
||||
mutate(
|
||||
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) # disregard shift for set to max values in Z score calculation
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
@@ -281,52 +268,29 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
||||
print("Calculating interaction scores part 2")
|
||||
interaction_scores_all <- interaction_scores %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
lm_L = lm(Delta_L ~ conc_num_factor),
|
||||
lm_K = lm(Delta_K ~ conc_num_factor),
|
||||
lm_r = lm(Delta_r ~ conc_num_factor),
|
||||
lm_AUC = lm(Delta_AUC ~ conc_num_factor),
|
||||
lm_score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
|
||||
lm_score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
|
||||
lm_score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
|
||||
lm_score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
|
||||
lm_sd_L = sd(lm_score_L),
|
||||
lm_sd_K = sd(lm_score_K),
|
||||
lm_sd_r = sd(lm_score_r),
|
||||
lm_sd_AUC = sd(lm_score_AUC),
|
||||
lm_mean_L = mean(lm_score_L),
|
||||
lm_mean_K = mean(lm_score_K),
|
||||
lm_mean_r = mean(lm_score_r),
|
||||
lm_mean_AUC = mean(lm_score_AUC),
|
||||
Z_lm_L = (lm_score_L - lm_mean_L) / lm_sd_L,
|
||||
Z_lm_K = (lm_score_K - lm_mean_K) / lm_sd_K,
|
||||
Z_lm_r = (lm_score_r - lm_mean_r) / lm_sd_r,
|
||||
Z_lm_AUC = (lm_score_AUC - lm_mean_AUC) / lm_sd_AUC,
|
||||
r_squared_L = summary(lm_L)$r.squared,
|
||||
r_squared_K = summary(lm_K)$r.squared,
|
||||
r_squared_r = summary(lm_r)$r.squared,
|
||||
r_squared_AUC = summary(lm_AUC)$r.squared,
|
||||
Sum_Zscore_L = sum(Zscore_L),
|
||||
mutate(across(all_of(variables), list(
|
||||
lm_score = ~ max_conc * lm(Delta ~ conc_num_factor)$coefficients[2] + lm(Delta ~ conc_num_factor)$coefficients[1],
|
||||
lm_sd = ~ sd(lm_score),
|
||||
lm_mean = ~ mean(lm_score),
|
||||
Z_lm = ~ (lm_score - lm_mean) / lm_sd,
|
||||
Sum_Zscore = ~ sum(Zscore, na.rm = TRUE)
|
||||
), .names = "{.fn}_{.col}")) %>%
|
||||
mutate(
|
||||
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
|
||||
Sum_Zscore_K = sum(Zscore_K),
|
||||
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
|
||||
Sum_Zscore_r = sum(Zscore_r),
|
||||
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
|
||||
Sum_Zscore_AUC = sum(Zscore_AUC),
|
||||
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
|
||||
NG = sum(NG),
|
||||
DB = sum(DB),
|
||||
SM = sum(SM)
|
||||
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1)
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
interaction_scores_all <- interactions_scores_all %>%
|
||||
interaction_scores_all <- interaction_scores_all %>%
|
||||
arrange(desc(Z_lm_L)) %>%
|
||||
arrange(desc(NG))
|
||||
|
||||
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
|
||||
}
|
||||
|
||||
|
||||
interaction_plot_configs <- function(df, variable) {
|
||||
ylim_vals <- switch(variable,
|
||||
"L" = c(-65, 65),
|
||||
@@ -518,40 +482,19 @@ generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
|
||||
}
|
||||
|
||||
# Adjust missing values and calculate ranks
|
||||
adjust_missing_and_rank <- function(df) {
|
||||
adjust_missing_and_rank <- function(df, variables) {
|
||||
|
||||
df <- df %>%
|
||||
mutate(
|
||||
Avg_Zscore_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
|
||||
Avg_Zscore_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
|
||||
Avg_Zscore_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
|
||||
Avg_Zscore_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
|
||||
Z_lm_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
|
||||
Z_lm_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
|
||||
Z_lm_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
|
||||
Z_lm_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
|
||||
L_Rank = rank(Avg_Zscore_L),
|
||||
K_Rank = rank(Avg_Zscore_K),
|
||||
r_Rank = rank(Avg_Zscore_r),
|
||||
AUC_Rank = rank(Avg_Zscore_AUC),
|
||||
L_Rank_lm = rank(Z_lm_L),
|
||||
K_Rank_lm = rank(Z_lm_K),
|
||||
r_Rank_lm = rank(Z_lm_r),
|
||||
AUC_Rank_lm = rank(Z_lm_AUC)
|
||||
)
|
||||
mutate(across(all_of(variables), list(
|
||||
Avg_Zscore = if_else(is.na(.Avg_Zscore), 0.001, .Avg_Zscore),
|
||||
Z_lm = if_else(is.na(.Z_lm), 0.001, .Z_lm),
|
||||
Rank = rank(.Avg_Zscore),
|
||||
Rank_lm = rank(.Z_lm),
|
||||
), "{.fn}_{.col}"))
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
# Function to create and save all ranked plots
|
||||
create_ranked_plots <- function(df, output_dir) {
|
||||
df_adjusted <- adjust_missing_and_rank(df)
|
||||
|
||||
# Generate and save ranked plots
|
||||
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots")
|
||||
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm")
|
||||
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_naRM")
|
||||
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm_naRM")
|
||||
}
|
||||
|
||||
generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", title_prefix = "",
|
||||
x_label = NULL, y_label = NULL, ylim_vals = NULL, output_dir, file_prefix = "plot") {
|
||||
@@ -590,13 +533,14 @@ generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num",
|
||||
}
|
||||
|
||||
# Function to generate rank plots for the provided dataframe
|
||||
generate_rank_plots <- function(df, output_dir) {
|
||||
# Adjust missing values and calculate ranks
|
||||
df_adjusted <- adjust_missing_and_rank(df)
|
||||
create_ranked_plots <- function(df, output_dir) {
|
||||
|
||||
# List of variables for which we need to generate rank plots
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
|
||||
# Adjust missing values and calculate ranks
|
||||
df_adjusted <- adjust_missing_and_rank(df, variables)
|
||||
|
||||
# Generate rank plots for Avg_Zscore and Z_lm
|
||||
for (var in variables) {
|
||||
plot_rank_avg <- generate_plot(
|
||||
@@ -623,13 +567,6 @@ generate_rank_plots <- function(df, output_dir) {
|
||||
}
|
||||
}
|
||||
|
||||
# Function to call rank plot generation in the main function
|
||||
create_ranked_plots <- function(df, output_dir) {
|
||||
message("Generating rank plots")
|
||||
generate_rank_plots(df, output_dir)
|
||||
}
|
||||
|
||||
|
||||
main <- function() {
|
||||
lapply(names(args$experiments), function(exp_name) {
|
||||
exp <- args$experiments[[exp_name]]
|
||||
@@ -658,11 +595,6 @@ main <- function() {
|
||||
# Get the number of rows that are above tolerance
|
||||
rows_to_remove <- nrow(df_above_tolerance)
|
||||
|
||||
# Logging or handling the calculated values, e.g.:
|
||||
message("Half-median for L (above tolerance): ", L_half_median)
|
||||
message("Half-median for K (above tolerance): ", K_half_median)
|
||||
message("Number of rows above tolerance: ", rows_to_remove)
|
||||
|
||||
# Additional filtering for non-finite values in df_na
|
||||
df_na_filtered <- df_na %>%
|
||||
filter(if_any(c(L), ~ !is.finite(.))) %>%
|
||||
@@ -700,52 +632,40 @@ main <- function() {
|
||||
generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", after_qc_plot_configs)
|
||||
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", no_zeros_plot_configs)
|
||||
|
||||
|
||||
# Clean up
|
||||
rm(df, df_above_tolerance, df_no_zeros)
|
||||
|
||||
|
||||
# Calculate summary statistics
|
||||
message("Calculating summary statistics for all strains")
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
write.csv(stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
stats_joined <- left_join(df_na, stats, by = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
list(summary_stats, df_na_stats) <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
# stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
|
||||
print("stats:")
|
||||
print(head(stats))
|
||||
print("Summary stats:")
|
||||
print(head(summary_stats))
|
||||
|
||||
# Originally this filtered L NA's
|
||||
# TODO: Originally this filtered L NA's
|
||||
# Let's try to avoid for now since stats have already been calculated
|
||||
|
||||
# Filter data within and outside 2SD
|
||||
within_2sd_k <- stats_joined %>%
|
||||
message("Filtering by 2SD of K")
|
||||
df_na_within_2sd_k <- df_na %>%
|
||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
||||
outside_2sd_k <- stats_joined %>%
|
||||
df_na_outside_2sd_k <- df_na %>%
|
||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
||||
|
||||
# Summary statistics for within and outside 2SD of K
|
||||
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
list(l_within_2sd_k_stats, df_na_l_within_2sd_k_stats) <-
|
||||
calculate_summary_stats(df_na_l_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
message("Calculating summary statistics for L outside 2SD of K")
|
||||
list(l_outside_2sd_k_stats, df_na_l_outside_2sd_k_stats) <-
|
||||
calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
|
||||
# Write CSV files
|
||||
write.csv(l_within_2sd_k, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
|
||||
write.csv(l_outside_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
|
||||
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
cols_to_remove <- names(l_within_2sd_k)
|
||||
cols_to_keep <- c("conc_num", "conc_num_factor")
|
||||
within_2sd_k_clean <- within_2sd_k %>%
|
||||
select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
|
||||
l_within_2sd_k_joined <- within_2sd_k_clean %>%
|
||||
left_join(l_within_2sd_k, by = c("conc_num", "conc_num_factor"))
|
||||
|
||||
message("Calculating summary statistics for for L outside 2SD of K")
|
||||
cols_to_remove <- names(l_outside_2sd_k)
|
||||
cols_to_keep <- c("conc_num", "conc_num_factor")
|
||||
outside_2sd_k_clean <- outside_2sd_k %>%
|
||||
select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
|
||||
l_outside_2sd_k_joined <- outside_2sd_k_clean %>%
|
||||
left_join(l_outside_2sd_k, by = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
|
||||
write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
|
||||
|
||||
# Process background strains
|
||||
bg_strains <- c("YDL227C")
|
||||
@@ -767,20 +687,20 @@ main <- function() {
|
||||
|
||||
# Recalculate summary statistics for the background strain
|
||||
message("Calculating summary statistics for background strain")
|
||||
stats_bg <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
write.csv(stats_bg,
|
||||
list(summary_stats_bg, df_bg_stats) <-
|
||||
calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
write.csv(summary_stats_bg,
|
||||
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
# stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
|
||||
# Filter reference and deletion strains
|
||||
# Formerly X2_RF (reference strain)
|
||||
df_reference <- stats_joined %>%
|
||||
df_reference <- df_bg_stats %>%
|
||||
filter(OrfRep == strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
# Formerly X2 (deletion strains)
|
||||
df_deletion <- stats_joined %>%
|
||||
df_deletion <- df_bg_stats %>%
|
||||
filter(OrfRep != strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user