diff --git a/workflow/apps/r/calculate_interaction_zscores.R b/workflow/apps/r/calculate_interaction_zscores.R index af30aa3a..c739e34a 100644 --- a/workflow/apps/r/calculate_interaction_zscores.R +++ b/workflow/apps/r/calculate_interaction_zscores.R @@ -113,33 +113,37 @@ load_and_preprocess_data <- function(easy_results_file, genes) { stop("Error reading file: ", easy_results_file, "\n", e$message) }) + # Convert specific columns to numeric numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") df[numeric_columns[numeric_columns %in% colnames(df)]] <- suppressWarnings(lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)) + # Apply initial filters and mutating steps as in the original script df <- df %>% - filter(!.[[1]] %in% c("", "Scan")) %>% - filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>% - filter(Drug != "BMH21") %>% - mutate( - L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}, - AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}, - conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA, - delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.))) - LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA}, - GeneName = vapply(ORF, function(orf) { - gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName) - ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name) - }, character(1)), - OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep) - ) # %>% - # mutate(across(everything(), ~na_if(., ""))) + mutate(L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}) %>% + mutate(AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}) %>% + filter(!.[[1]] %in% c("", "Scan")) %>% # Filter out empty or Scan rows + # {if ("Conc" %in% colnames(.)) filter(., Conc != "0ug/mL") else .} %>% + filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>% # Remove blank genes + filter(Drug != "BMH21") %>% # Filter out specific drugs if necessary + filter(!is.na(ORF) & ORF != "") %>% # Ensure ORF is not NA or empty + mutate(OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep)) %>% + mutate(conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc))) %>% # Extract numeric drug concentrations + filter(!is.na(LstBackgrd) & !is.na(X1stBackgrd)) %>% # Ensure finite background values + mutate(delta_bg = LstBackgrd - X1stBackgrd) %>% # Calculate delta background + filter(!is.na(L) & !is.na(K) & !is.na(r) & !is.na(AUC96)) %>% # Ensure finite measurements + mutate(GeneName = vapply(ORF, function(orf) { + gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName) + ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name) + }, character(1))) if (nrow(df) == 0) warning("Dataframe is empty after filtering") return(df) } + + create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") { if (!("Scan" %in% colnames(df))) { warning("'Scan' column is not present in the data. Skipping this plot.") @@ -182,7 +186,6 @@ publish_summary_stats <- function(df, variables, out_dir) { fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) } -# Interaction scores publication publish_interaction_scores <- function(df, out_dir) { interaction_scores <- df %>% dplyr::group_by(OrfRep) %>% @@ -215,7 +218,7 @@ publish_interaction_scores <- function(df, out_dir) { deletion_suppressors_L <- interaction_scores[interaction_scores$mean_L <= -2, ] deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ] -deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ] + deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ] deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ] deletion_enhancers_and_suppressors_L <- interaction_scores[ @@ -239,6 +242,8 @@ deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ] return(interaction_scores) # Return the updated data frame with rank columns } + + publish_zscores <- function(df, out_dir) { zscores <- df %>% dplyr::mutate( @@ -282,6 +287,8 @@ generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { }) } + + # Create rank plots create_rank_plots <- function(interaction_scores, out_dir) { rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank") @@ -305,7 +312,6 @@ create_rank_plots <- function(interaction_scores, out_dir) { }) } - create_correlation_plot <- function(interaction_scores, out_dir) { # Check for non-finite values and remove them from the dataset interaction_scores <- interaction_scores %>% @@ -348,6 +354,7 @@ process_experiment <- function(exp_name, exp_dir, genes, output_dir) { dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) dir.create(out_dir_qc, showWarnings = FALSE) + # Load and preprocess the data data <- load_and_preprocess_data(args$easy_results_file, genes) # Calculate delta background tolerance @@ -385,4 +392,4 @@ if (length(processed_files) > 1) { combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv") fwrite(combined_data, combined_output_file, row.names = FALSE) -} +} \ No newline at end of file