Use a single df for process_strains()

This commit is contained in:
2024-09-02 15:36:45 -04:00
parent 756075a7e5
commit 40395d26e7

View File

@@ -270,21 +270,16 @@ save_plots <- function(file_name, plot_list, output_dir) {
} }
# Process strains (deletion and reference) # Process strains (deletion and reference)
process_strains <- function(df, l_within_2sd_k, strain) { process_strains <- function(df) {
df_strains <- data.frame() # Initialize an empty dataframe to store results df_strains <- data.frame() # Initialize an empty dataframe to store results
print(names(l_within_2sd_k))
for (concentration in unique(df$conc_num)) { for (concentration in unique(df$conc_num)) {
message("Processing concentration: ", concentration)
df_temp <- df %>% filter(conc_num == concentration) df_temp <- df %>% filter(conc_num == concentration)
if (concentration > 0) { if (concentration > 0) {
max_l_theoretical <- l_within_2sd_k %>% max_l_theoretical <- df_temp %>% pull(L_max)
filter(conc_num_factor == concentration) %>%
pull(L_max)
df_temp <- df_temp %>% df_temp <- df_temp %>%
mutate( mutate(
L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L), # Replace zero values with max theoretical L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L), # Replace zero values with max theoretical
@@ -292,7 +287,8 @@ process_strains <- function(df, l_within_2sd_k, strain) {
L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) # Cap L values L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) # Cap L values
) )
} }
df_strains <- bind_rows(df_strains, df_temp) # Append the results of this concentration # Append the results of this concentration to df_strains
df_strains <- bind_rows(df_strains, df_temp)
} }
return(df_strains) return(df_strains)
@@ -585,27 +581,20 @@ main <- function() {
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
# Remove existing calculated summary statistics and add the new ones
cols_to_remove <- names(l_within_2sd_k) cols_to_remove <- names(l_within_2sd_k)
cols_to_keep <- c("conc_num", "conc_num_factor") cols_to_keep <- c("conc_num", "conc_num_factor")
within_2sd_k_clean <- within_2sd_k %>% within_2sd_k_clean <- within_2sd_k %>%
select(-all_of(setdiff(cols_to_remove, cols_to_keep))) select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
l_within_2sd_k_joined <- within_2sd_k_clean %>% l_within_2sd_k_joined <- within_2sd_k_clean %>%
left_join(l_within_2sd_k, by = c("conc_num", "conc_num_factor")) left_join(l_within_2sd_k, by = c("conc_num", "conc_num_factor"))
# print("within_2sd_k")
# print(head(within_2sd_k))
# print("l_within_2sd_k")
#l_within_2sd_k_joined <- merge(within_2sd_k, l_within_2sd_k, by = c("conc_num", "conc_num_factor"), all.x = TRUE) # print(head(l_within_2sd_k))
print("within_2sd_k") # print("l_within_2sd_k_joined")
print(head(within_2sd_k)) # print(head(l_within_2sd_k_joined))
print("l_within_2sd_k")
print(head(l_within_2sd_k))
print("l_within_2sd_k_joined")
print(head(l_within_2sd_k_joined))
quit()
write.csv(l_within_2sd_k, write.csv(l_within_2sd_k,
file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
@@ -655,9 +644,9 @@ main <- function() {
mutate(SM = 0) mutate(SM = 0)
message("Processing reference strain") message("Processing reference strain")
reference_strain <- process_strains(df_reference, l_within_2sd_k_joined, strain) reference_strain <- process_strains(l_within_2sd_k_joined)
message("Processing deletion strains") message("Processing deletion strains")
deletion_strains <- process_strains(df_deletion, l_within_2sd_k_joined, strain) deletion_strains <- process_strains(l_within_2sd_k_joined)
# TODO we may need to add "num" to grouping vars # TODO we may need to add "num" to grouping vars