Fix placement of l_witin_2sd_k

This commit is contained in:
2024-09-01 19:58:40 -04:00
parent fd03fbb205
commit 6fe47421df

View File

@@ -245,27 +245,6 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
return(summary_stats) return(summary_stats)
} }
# Calculate L values within and outside 2SD of K
calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
# Join the statistics to the main dataframe
df_joined <- df %>%
filter(!is.na(L)) %>%
left_join(df_stats_by_k, by = "conc_num_factor")
# Filter data within 2SD and outside 2SD
df_within_2sd_k <- df_joined %>%
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
df_outside_2sd_k <- df_joined %>%
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
# Select relevant columns to avoid duplicated columns from the join
df_within_2sd_k <- df_within_2sd_k %>% select(names(df))
df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df))
list(within_2sd_k = df_within_2sd_k, outside_2sd_k = df_outside_2sd_k)
}
# Ensure all plots are saved and printed to PDF # Ensure all plots are saved and printed to PDF
save_plots <- function(file_name, plot_list, output_dir) { save_plots <- function(file_name, plot_list, output_dir) {
# Save to PDF # Save to PDF
@@ -294,7 +273,6 @@ save_plots <- function(file_name, plot_list, output_dir) {
}) })
} }
# Calculate background strain mean values # Calculate background strain mean values
calculate_bg_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) { calculate_bg_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
list( list(
@@ -573,10 +551,13 @@ main <- function() {
# Flag and remove non-finite data, printing affected rows # Flag and remove non-finite data, printing affected rows
df_na_filtered <- df_na %>% df_na_filtered <- df_na %>%
filter(if_any(c(L, r, AUC, K), ~ !is.finite(.))) %>% filter(if_any(c(L), ~ !is.finite(.))) %>% # Add L, r, AUC, K as needed for debugging
{ {
if (nrow(.) > 0) message("Removing non-finite rows:\n", print(.)) if (nrow(.) > 0) {
df_na %>% filter(if_all(c(L, r, AUC, K), is.finite)) message("Removing non-finite rows:\n")
#print(.)
}
df_na %>% filter(if_all(c(L), is.finite)) # Add L, r, AUC, K as needed for debugging
} }
# # Generate QC PDFs and HTMLs # # Generate QC PDFs and HTMLs
@@ -596,11 +577,34 @@ main <- function() {
write.csv(stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) write.csv(stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
stats_joined <- left_join(df_na, stats, by = c("conc_num", "conc_num_factor")) stats_joined <- left_join(df_na, stats, by = c("conc_num", "conc_num_factor"))
# Create separate dataframes for each variable (we'll use later for plotting)
stats_by_l <- stats_joined %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_l <- stats_joined %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_k <- stats_joined %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_k <- stats_joined %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_k_joined <- left_join(df_na, stats_by_k, by = c("conc_num_factor")) # TODO may need to add OrfRep, conc_num, etc
# Filter data within 2SD
within_2sd_k <- stats_by_k_joined %>%
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
# Filter data outside 2SD
outside_2sd_k <- stats_by_k_joined %>%
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
# Calculate summary statistics for L within and outside 2SD of K
message("Calculating summary statistics for L within 2SD of K")
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
write.csv(l_within_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
row.names = FALSE)
message("Calculating summary statistics for for L outside 2SD of K")
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
write.csv(l_outside_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
row.names = FALSE)
# Process background strains # Process background strains
background_strains <- c("YDL227C") background_strains <- c("YDL227C")
lapply(background_strains, function(strain) { lapply(background_strains, function(strain) {
@@ -631,26 +635,6 @@ main <- function() {
row.names = FALSE) row.names = FALSE)
stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
# Filter L values within and outside 2SD of K
results_2sd <- calculate_l_2sd_of_k(df_bg, stats_by_k_bg)
within_2sd_k <- results_2sd$within_2sd_k
outside_2sd_k <- results_2sd$outside_2sd_k
# Calculate summary statistics for L within and outside 2SD of K
message("Calculating summary statistics for for L within 2SD of K")
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
write.csv(l_within_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
row.names = FALSE)
message("Calculating summary statistics for for L outside 2SD of K")
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
write.csv(l_outside_2sd_k,
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
row.names = FALSE)
message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots")
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
message("Calculating background means") message("Calculating background means")
background_means <- calculate_bg_means(stats_by_l_bg, stats_by_k_bg, stats_by_r_bg, stats_by_auc_bg) background_means <- calculate_bg_means(stats_by_l_bg, stats_by_k_bg, stats_by_r_bg, stats_by_auc_bg)