Fix placement of l_witin_2sd_k
This commit is contained in:
@@ -245,27 +245,6 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
||||
return(summary_stats)
|
||||
}
|
||||
|
||||
# Calculate L values within and outside 2SD of K
|
||||
calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
|
||||
# Join the statistics to the main dataframe
|
||||
df_joined <- df %>%
|
||||
filter(!is.na(L)) %>%
|
||||
left_join(df_stats_by_k, by = "conc_num_factor")
|
||||
|
||||
# Filter data within 2SD and outside 2SD
|
||||
df_within_2sd_k <- df_joined %>%
|
||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
||||
|
||||
df_outside_2sd_k <- df_joined %>%
|
||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
||||
|
||||
# Select relevant columns to avoid duplicated columns from the join
|
||||
df_within_2sd_k <- df_within_2sd_k %>% select(names(df))
|
||||
df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df))
|
||||
|
||||
list(within_2sd_k = df_within_2sd_k, outside_2sd_k = df_outside_2sd_k)
|
||||
}
|
||||
|
||||
# Ensure all plots are saved and printed to PDF
|
||||
save_plots <- function(file_name, plot_list, output_dir) {
|
||||
# Save to PDF
|
||||
@@ -294,7 +273,6 @@ save_plots <- function(file_name, plot_list, output_dir) {
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
# Calculate background strain mean values
|
||||
calculate_bg_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
|
||||
list(
|
||||
@@ -573,11 +551,14 @@ main <- function() {
|
||||
|
||||
# Flag and remove non-finite data, printing affected rows
|
||||
df_na_filtered <- df_na %>%
|
||||
filter(if_any(c(L, r, AUC, K), ~ !is.finite(.))) %>%
|
||||
{
|
||||
if (nrow(.) > 0) message("Removing non-finite rows:\n", print(.))
|
||||
df_na %>% filter(if_all(c(L, r, AUC, K), is.finite))
|
||||
}
|
||||
filter(if_any(c(L), ~ !is.finite(.))) %>% # Add L, r, AUC, K as needed for debugging
|
||||
{
|
||||
if (nrow(.) > 0) {
|
||||
message("Removing non-finite rows:\n")
|
||||
#print(.)
|
||||
}
|
||||
df_na %>% filter(if_all(c(L), is.finite)) # Add L, r, AUC, K as needed for debugging
|
||||
}
|
||||
|
||||
# # Generate QC PDFs and HTMLs
|
||||
# message("Generating QC plots")
|
||||
@@ -596,10 +577,33 @@ main <- function() {
|
||||
write.csv(stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
stats_joined <- left_join(df_na, stats, by = c("conc_num", "conc_num_factor"))
|
||||
|
||||
# Create separate dataframes for each variable (we'll use later for plotting)
|
||||
stats_by_l <- stats_joined %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_k <- stats_joined %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
|
||||
stats_by_k_joined <- left_join(df_na, stats_by_k, by = c("conc_num_factor")) # TODO may need to add OrfRep, conc_num, etc
|
||||
|
||||
# Filter data within 2SD
|
||||
within_2sd_k <- stats_by_k_joined %>%
|
||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
||||
|
||||
# Filter data outside 2SD
|
||||
outside_2sd_k <- stats_by_k_joined %>%
|
||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
||||
|
||||
# Calculate summary statistics for L within and outside 2SD of K
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_within_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
message("Calculating summary statistics for for L outside 2SD of K")
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_outside_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
# Process background strains
|
||||
background_strains <- c("YDL227C")
|
||||
@@ -630,26 +634,6 @@ main <- function() {
|
||||
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
||||
|
||||
# Filter L values within and outside 2SD of K
|
||||
results_2sd <- calculate_l_2sd_of_k(df_bg, stats_by_k_bg)
|
||||
within_2sd_k <- results_2sd$within_2sd_k
|
||||
outside_2sd_k <- results_2sd$outside_2sd_k
|
||||
|
||||
# Calculate summary statistics for L within and outside 2SD of K
|
||||
message("Calculating summary statistics for for L within 2SD of K")
|
||||
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_within_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
message("Calculating summary statistics for for L outside 2SD of K")
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_outside_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Generating Raw_L_vs_K_for_strains_outside_2sd_k plots")
|
||||
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
||||
|
||||
message("Calculating background means")
|
||||
background_means <- calculate_bg_means(stats_by_l_bg, stats_by_k_bg, stats_by_r_bg, stats_by_auc_bg)
|
||||
|
||||
Reference in New Issue
Block a user