Refine df filtering in calculate_interaction_zscores.R
This commit is contained in:
@@ -113,33 +113,37 @@ load_and_preprocess_data <- function(easy_results_file, genes) {
|
|||||||
stop("Error reading file: ", easy_results_file, "\n", e$message)
|
stop("Error reading file: ", easy_results_file, "\n", e$message)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Convert specific columns to numeric
|
||||||
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
|
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
|
||||||
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
|
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
|
||||||
suppressWarnings(lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric))
|
suppressWarnings(lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric))
|
||||||
|
|
||||||
|
# Apply initial filters and mutating steps as in the original script
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
filter(!.[[1]] %in% c("", "Scan")) %>%
|
mutate(L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}) %>%
|
||||||
filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>%
|
mutate(AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}) %>%
|
||||||
filter(Drug != "BMH21") %>%
|
filter(!.[[1]] %in% c("", "Scan")) %>% # Filter out empty or Scan rows
|
||||||
mutate(
|
# {if ("Conc" %in% colnames(.)) filter(., Conc != "0ug/mL") else .} %>%
|
||||||
L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
|
filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>% # Remove blank genes
|
||||||
AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
|
filter(Drug != "BMH21") %>% # Filter out specific drugs if necessary
|
||||||
conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
|
filter(!is.na(ORF) & ORF != "") %>% # Ensure ORF is not NA or empty
|
||||||
delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
|
mutate(OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
|
||||||
LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
|
mutate(conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc))) %>% # Extract numeric drug concentrations
|
||||||
GeneName = vapply(ORF, function(orf) {
|
filter(!is.na(LstBackgrd) & !is.na(X1stBackgrd)) %>% # Ensure finite background values
|
||||||
|
mutate(delta_bg = LstBackgrd - X1stBackgrd) %>% # Calculate delta background
|
||||||
|
filter(!is.na(L) & !is.na(K) & !is.na(r) & !is.na(AUC96)) %>% # Ensure finite measurements
|
||||||
|
mutate(GeneName = vapply(ORF, function(orf) {
|
||||||
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
|
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
|
||||||
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
|
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
|
||||||
}, character(1)),
|
}, character(1)))
|
||||||
OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep)
|
|
||||||
) # %>%
|
|
||||||
# mutate(across(everything(), ~na_if(., "")))
|
|
||||||
|
|
||||||
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
|
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
|
||||||
|
|
||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
|
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
|
||||||
if (!("Scan" %in% colnames(df))) {
|
if (!("Scan" %in% colnames(df))) {
|
||||||
warning("'Scan' column is not present in the data. Skipping this plot.")
|
warning("'Scan' column is not present in the data. Skipping this plot.")
|
||||||
@@ -182,7 +186,6 @@ publish_summary_stats <- function(df, variables, out_dir) {
|
|||||||
fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Interaction scores publication
|
|
||||||
publish_interaction_scores <- function(df, out_dir) {
|
publish_interaction_scores <- function(df, out_dir) {
|
||||||
interaction_scores <- df %>%
|
interaction_scores <- df %>%
|
||||||
dplyr::group_by(OrfRep) %>%
|
dplyr::group_by(OrfRep) %>%
|
||||||
@@ -239,6 +242,8 @@ deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ]
|
|||||||
return(interaction_scores) # Return the updated data frame with rank columns
|
return(interaction_scores) # Return the updated data frame with rank columns
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
publish_zscores <- function(df, out_dir) {
|
publish_zscores <- function(df, out_dir) {
|
||||||
zscores <- df %>%
|
zscores <- df %>%
|
||||||
dplyr::mutate(
|
dplyr::mutate(
|
||||||
@@ -282,6 +287,8 @@ generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Create rank plots
|
# Create rank plots
|
||||||
create_rank_plots <- function(interaction_scores, out_dir) {
|
create_rank_plots <- function(interaction_scores, out_dir) {
|
||||||
rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
|
rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
|
||||||
@@ -305,7 +312,6 @@ create_rank_plots <- function(interaction_scores, out_dir) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
create_correlation_plot <- function(interaction_scores, out_dir) {
|
create_correlation_plot <- function(interaction_scores, out_dir) {
|
||||||
# Check for non-finite values and remove them from the dataset
|
# Check for non-finite values and remove them from the dataset
|
||||||
interaction_scores <- interaction_scores %>%
|
interaction_scores <- interaction_scores %>%
|
||||||
@@ -348,6 +354,7 @@ process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
|
|||||||
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
|
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
|
||||||
dir.create(out_dir_qc, showWarnings = FALSE)
|
dir.create(out_dir_qc, showWarnings = FALSE)
|
||||||
|
|
||||||
|
# Load and preprocess the data
|
||||||
data <- load_and_preprocess_data(args$easy_results_file, genes)
|
data <- load_and_preprocess_data(args$easy_results_file, genes)
|
||||||
|
|
||||||
# Calculate delta background tolerance
|
# Calculate delta background tolerance
|
||||||
|
|||||||
Reference in New Issue
Block a user