diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index ecb4816a..46df32d9 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -119,10 +119,11 @@ load_and_process_data <- function(easy_results_file, sd = 3) { mutate( Col = as.numeric(Col), Row = as.numeric(Row), + num = as.numeric(Num.), L = as.numeric(l), K = as.numeric(K), r = as.numeric(r), - Scan = as.numeric(Scan), + scan = as.numeric(Scan), AUC = as.numeric(AUC96), last_bg = as.numeric(LstBackgrd), first_bg = as.numeric(X1stBackgrd), @@ -133,8 +134,9 @@ load_and_process_data <- function(easy_results_file, sd = 3) { OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)), conc_num_factor = as.numeric(as.factor(conc_num)) - 1, - max_conc = max(conc_num_factor) - ) + max_conc = max(conc_num_factor)) + + return(df) } # Function to update Gene names using the SGD gene list @@ -150,10 +152,12 @@ update_gene_names <- function(df, sgd_gene_list) { mutate(Gene = ifelse(OrfRep != "YDL227C", gene_map[[ORF]], Gene)) %>% ungroup() %>% mutate(Gene = ifelse(Gene == "" | Gene == "OCT1", OrfRep, Gene)) + + return(df) } generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num", title, x_label = NULL, y_label = NULL) { - plot <- ggplot(df, aes_string(x = x_var, y = y_var, color = color_var)) + plot <- ggplot(df, aes(x = {{x_var}}, y = {{y_var}}, color = {{color_var}})) if (plot_type == "scatter") { plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") + @@ -179,20 +183,24 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q plots <- list() for (var in variables) { - scatter_plot <- generate_plot(df, "Scan", var, "scatter", title = paste(prefix, "Scatter Plot for", var)) - boxplot <- generate_plot(df, "Scan", var, "box", title = paste(prefix, "Box Plot for", var)) + scatter_plot <- + generate_plot(df, x_var = scan, y_var = !!sym(var), plot_type = "scatter", title = paste(prefix, "Scatter Plot for", var)) + boxplot <- + generate_plot(df, x_var = as.factor(scan), y_var = !!sym(var), plot_type = "box", title = paste(prefix, "Box Plot for", var)) plots[[paste0(var, "_scatter")]] <- scatter_plot plots[[paste0(var, "_box")]] <- boxplot } if (include_qc) { - plots[["Raw_L_vs_K"]] <- generate_plot(df, "L", "K", "scatter", title = "Raw L vs K before QC") - plots[["Delta_bg_Density"]] <- generate_plot(df, "delta_bg", NULL, "density", title = "Density plot for Delta Background by Conc All Data") - plots[["Delta_bg_Bar"]] <- generate_plot(df, "delta_bg", NULL, "bar", title = "Bar plot for Delta Background by Conc All Data") + plots[["Raw_L_vs_K"]] <- + generate_plot(df, x_var = L, y_var = K, plot_type = "scatter", title = "Raw L vs K before QC") + plots[["Delta_bg_Density"]] <- + generate_plot(df, x_var = delta_bg, plot_type = "density", title = "Density plot for Delta Background by Conc All Data") + plots[["Delta_bg_Bar"]] <- + generate_plot(df, x_var = delta_bg, plot_type = "bar", title = "Bar plot for Delta Background by Conc All Data") } - # Save the generated plots using the unified function save_plots(prefix, plots, output_dir) } @@ -263,18 +271,9 @@ calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by } # Function to process strains (deletion and reference) - -process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir, strain_type = "deletion") { - df_strains <- if (strain_type == "deletion") { - df %>% filter(OrfRep != strain) - } else { - df %>% filter(OrfRep == strain) - } - - df_strains <- df_strains %>% mutate(SM = 0) - - for (concentration in unique(df_strains$conc_num)) { - df_temp <- df_strains %>% filter(conc_num == concentration) +process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) { + for (concentration in unique(df$conc_num)) { + df_temp <- df %>% filter(conc_num == concentration) if (concentration > 0) { max_l_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) df_temp <- df_temp %>% @@ -284,12 +283,166 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir, L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) ) } - df_strains <- bind_rows(df_strains, df_temp) + df_strains <- bind_rows(df, df_temp) } - + return(df_strains) } +calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l, df_stats_by_k, + df_stats_by_r, df_stats_by_auc, background_means, + max_conc, variables, out_dir, is_reference = TRUE) { + interaction_scores <- interaction_scores %>% + mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA + + for (i in seq_len(nrow(interaction_scores))) { + gene_sel <- interaction_scores$OrfRep[i] + df_gene_sel <- df %>% filter(OrfRep == gene_sel) + + df_stats_interaction <- df_gene_sel %>% + group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% + summarise( + N = n(), + across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE), + NG = sum(NG, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE) + ) %>% + ungroup() + + shifts <- df_stats_interaction %>% + mutate( + raw_shift_l = L_mean - background_means$L, + z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1], + raw_shift_k = K_mean - background_means$K, + z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1], + raw_shift_r = r_mean - background_means$r, + z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1], + raw_shift_auc = AUC_mean - background_means$AUC, + z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1] + ) + + linear_models <- shifts %>% + summarise( + lm_l = list(lm(L_mean ~ conc_num_factor)), + lm_k = list(lm(K_mean ~ conc_num_factor)), + lm_r = list(lm(r_mean ~ conc_num_factor)), + lm_auc = list(lm(AUC_mean ~ conc_num_factor)) + ) + + z_scores <- shifts %>% + mutate( + exp_l = background_means$L + raw_shift_l, + delta_l = L_mean - exp_l, + exp_k = background_means$K + raw_shift_k, + delta_k = K_mean - exp_k, + exp_r = background_means$r + raw_shift_r, + delta_r = r_mean - exp_r, + exp_auc = background_means$AUC + raw_shift_auc, + delta_auc = AUC_mean - exp_auc, + zscore_l = delta_l / df_stats_by_l$L_sd, + zscore_k = delta_k / df_stats_by_k$K_sd, + zscore_r = delta_r / df_stats_by_r$r_sd, + zscore_auc = delta_auc / df_stats_by_auc$AUC_sd, + sum_z_score_l = sum(zscore_l, na.rm = TRUE), + avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)), + sum_z_score_k = sum(zscore_k, na.rm = TRUE), + avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)), + sum_z_score_r = sum(zscore_r, na.rm = TRUE), + avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)), + sum_z_score_auc = sum(zscore_auc, na.rm = TRUE), + avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE)) + ) + + # Update interaction_scores with calculated values + interaction_scores[i, ] <- interaction_scores[i, ] %>% + mutate( + Raw_Shift_L = shifts$raw_shift_l[1], + Z_Shift_L = shifts$z_shift_l[1], + lm_Score_L = max_conc * coef(linear_models$lm_l)[2] + coef(linear_models$lm_l)[1], + R_Squared_L = summary(linear_models$lm_l)$r.squared, + Sum_Z_Score_L = z_scores$sum_z_score_l[1], + Avg_Zscore_L = z_scores$avg_zscore_l[1], + Raw_Shift_K = shifts$raw_shift_k[1], + Z_Shift_K = shifts$z_shift_k[1], + lm_Score_K = max_conc * coef(linear_models$lm_k)[2] + coef(linear_models$lm_k)[1], + R_Squared_K = summary(linear_models$lm_k)$r.squared, + Sum_Z_Score_K = z_scores$sum_z_score_k[1], + Avg_Zscore_K = z_scores$avg_zscore_k[1], + Raw_Shift_r = shifts$raw_shift_r[1], + Z_Shift_r = shifts$z_shift_r[1], + lm_Score_r = max_conc * coef(linear_models$lm_r)[2] + coef(linear_models$lm_r)[1], + R_Squared_r = summary(linear_models$lm_r)$r.squared, + Sum_Z_Score_r = z_scores$sum_z_score_r[1], + Avg_Zscore_r = z_scores$avg_zscore_r[1], + Raw_Shift_AUC = shifts$raw_shift_auc[1], + Z_Shift_AUC = shifts$z_shift_auc[1], + lm_Score_AUC = max_conc * coef(linear_models$lm_auc)[2] + coef(linear_models$lm_auc)[1], + R_Squared_AUC = summary(linear_models$lm_auc)$r.squared, + Sum_Z_Score_AUC = z_scores$sum_z_score_auc[1], + Avg_Zscore_AUC = z_scores$avg_zscore_auc[1], + NG = sum(df_stats_interaction$NG, na.rm = TRUE), + DB = sum(df_stats_interaction$DB, na.rm = TRUE), + SM = sum(df_stats_interaction$SM, na.rm = TRUE) + ) + + # If deletion strain, append to the final interaction statistics + if (!is_reference) { + if (i == 1) { + df_stats_interaction_all <- df_stats_interaction + } else { + df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction) + } + } + } + + # If deletion strain, return the final interaction statistics along with the scores + if (!is_reference) { + return(list(interaction_scores = interaction_scores, df_stats_interaction_all = df_stats_interaction_all)) + } + + return(interaction_scores) +} + + +filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) { + + interaction_scores <- interaction_scores %>% + arrange(desc(Z_lm_L)) %>% + arrange(desc(NG)) + + output_files <- list( + paste0(prefix, "_ZScores_Interaction.csv") = interaction_scores, + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2), + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K.csv") = filter(interaction_scores, Avg_Zscore_K <= -2), + paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L <= -2), + paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2), + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2), + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2), + paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv") = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2), + paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv") = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2), + paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv") = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2), + paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv") = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2) + ) + + output_files_lm <- list( + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2), + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K_lm.csv") = filter(interaction_scores, Z_lm_K <= -2), + paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L <= -2), + paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2), + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2), + paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2) + ) + + for (file_name in names(output_files)) { + write.csv(output_files[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE) + } + + for (file_name in names(output_files_lm)) { + write.csv(output_files_lm[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE) + } +} + + @@ -401,18 +554,91 @@ main <- function() { background_means <- calculate_background_means(df_stats_by_l_background, df_stats_by_k_background, df_stats_by_r_background, df_stats_by_auc_background) + + df_reference <- df_na %>% + filter(OrfRep == strain) %>% + mutate(SM = 0) + + df_deletion <- df_na %>% + filter(OrfRep != strain) %>% + mutate(SM = 0) - df_deletion_strains <- process_strains(df_na, df_stats_by_l_within_2sd_k, strain, out_dir, "deletion") - write.csv(df_deletion_strains, - file = file.path(output_dir, paste0("Processed_", strain_type, "_strains_", strain, ".csv")), + df_reference_strains <- process_strains(df_reference, df_stats_by_l_within_2sd_k, strain, out_dir) + write.csv(df_reference_strains, + file = file.path(output_dir, paste0("Processed_reference_strains_", strain, ".csv")), row.names = FALSE) - df_reference_strains <- process_strains(df_na, df_stats_by_l_within_2sd_k, strain, out_dir, "reference") - write.csv(df_strains, - file = file.path(output_dir, paste0("Processed_", strain_type, "_strains_", strain, ".csv")), + df_deletion_strains <- process_strains(df_deletion, df_stats_by_l_within_2sd_k, strain, out_dir) + write.csv(df_deletion_strains, + file = file.path(output_dir, paste0("Processed_deletion_strains_", strain, ".csv")), row.names = FALSE) - - # Further processing or calculations using df_deletion_strains and df_reference_strains + + # TODO: we are mutating OrfRep to make it more unique, is this sensible? + # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score + df_reference_strains <- df_reference_strains %>% + mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) + + num_genes <- length(unique(df_reference_strains$OrfRep)) + print(num_genes) + + # TODO: Is this necessary? + interaction_scores_reference <- df_reference_strains %>% + distinct(OrfRep) %>% + mutate( + Gene = NA, + raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, + r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, + raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, + r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, + raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, + r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, + raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, + r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, + NG = NA, SM = NA + ) + + interaction_scores_reference <- calculate_interaction_scores( + interaction_scores_reference, df_reference_strains, df_stats_by_l, df_stats_by_k, + df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = TRUE) + + write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) + + interaction_scores_deletion <- df_deletion_strains %>% + distinct(OrfRep) %>% + mutate( + Gene = NA, + raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA, + r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA, + raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA, + r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA, + raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA, + r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA, + raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA, + r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA, + NG = NA, DB = NA, SM = NA + ) + + result <- calculate_interaction_scores( + interaction_scores_deletion, df_deletion_strains, df_stats_by_l, df_stats_by_k, + df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = FALSE) + + interaction_scores_deletion <- result$interaction_scores + df_stats_interaction_all <- result$df_stats_interaction_all + + write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) + write.csv(df_stats_interaction_all, file = file.path(out_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE) + + # Filter and save interaction scores for reference strains + filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF") + + # Filter and save interaction scores for deletion strains + filter_and_save_interaction_scores(interaction_scores_deletion, out_dir, "Deletion") + + + # Generate plots for interaction scores + # plot_interaction_scores(interaction_scores_reference, out_dir) # Assuming plot_interaction_scores is your plotting function + + }) }) } @@ -420,339 +646,17 @@ main() - - - for (s in background_strains) { - - # Get the background strain mean values at the no-drug concentration (conc_num_factor = 0) - background_mean_l <- df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L) - background_mean_k <- df_stats_by_k %>% filter(conc_num_factor == 0) %>% pull(mean_K) - background_mean_r <- df_stats_by_r %>% filter(conc_num_factor == 0) %>% pull(mean_r) - background_mean_auc <- df_stats_by_auc %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) - # Initialize empty plots (placeholder for future plotting) - p_l <- ggplot() - p_k <- ggplot() - p_r <- ggplot() - p_auc <- ggplot() - p_rf_l <- ggplot() - p_rf_k <- ggplot() - p_rf_r <- ggplot() - p_rf_auc <- ggplot() - - # Get only the deletion strains (excluding the background strain) - df_deletion_strains <- df %>% filter(OrfRep != s) - - # Initialize SM (Set to Max) column - df_deletion_strains <- df_deletion_strains %>% mutate(SM = 0) - - # Set missing values to the highest theoretical value at each drug concentration for L - df_deletion_strains_new <- data.frame() - - for (i in seq_along(unique(df_deletion_strains$conc_num))) { - concentration <- unique(df_deletion_strains$conc_num)[i] - df_temp <- df_deletion_strains %>% filter(conc_num == concentration) - - if (concentration == 0) { - df_deletion_strains_new <- df_temp - message(paste("Check loop order, conc =", concentration)) - } else { - max_L_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) - - df_temp <- df_temp %>% - mutate( - L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L), - SM = ifelse(L >= max_L_theoretical & !is.na(L), 1, SM), - L = ifelse(L >= max_L_theoretical & !is.na(L), max_L_theoretical, L) - ) - - df_deletion_strains_new <- bind_rows(df_deletion_strains_new, df_temp) - message(paste("Check loop order, conc =", concentration)) - } - } - - df_deletion_strains <- df_deletion_strains_new - - # Get only the reference strains (background strain) - df_reference_strains <- df %>% filter(OrfRep == s) - - # Initialize SM (Set to Max) column - df_reference_strains <- df_reference_strains %>% mutate(SM = 0) - - # Set missing values to the highest theoretical value at each drug concentration for L - df_reference_strains_new <- data.frame() - - for (i in seq_along(unique(df_reference_strains$conc_num))) { - concentration <- unique(df_reference_strains$conc_num)[i] - df_rf_temp <- df_reference_strains %>% filter(conc_num == concentration) - - if (concentration == 0) { - df_reference_strains_new <- df_rf_temp - message(paste("Check loop order, conc =", concentration)) - } else { - max_L_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) - - df_rf_temp <- df_rf_temp %>% - mutate( - L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L), - SM = ifelse(L >= max_L_theoretical & !is.na(L), 1, SM), - L = ifelse(L >= max_L_theoretical & !is.na(L), max_L_theoretical, L) - ) - - df_reference_strains_new <- bind_rows(df_reference_strains_new, df_rf_temp) - message(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =", concentration)) - } - } - - df_reference_strains <- df_reference_strains_new - - df_RF <- df_RF_new - - # Get the RF Z score values - - # Change the OrfRep column to include the RF strain, Gene name, and Num so each RF gets its own score - df_RF <- df_RF %>% - mutate(OrfRep = paste(OrfRep, Gene, Num, sep = "_")) - - num_genes_RF <- length(unique(df_RF$OrfRep)) - print(num_genes_RF) - - # Create the output dataframe containing columns for each RF strain - interaction_scores_RF <- df_RF %>% - distinct(OrfRep) %>% - mutate( - Gene = NA, - Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA, R_Squared_L = NA, Sum_Z_Score_L = NA, Avg_Zscore_L = NA, - Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA, R_Squared_K = NA, Sum_Z_Score_K = NA, Avg_Zscore_K = NA, - Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA, R_Squared_r = NA, Sum_Z_Score_r = NA, Avg_Zscore_r = NA, - Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA, R_Squared_AUC = NA, Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA, - NG = NA, SM = NA - ) - - for (i in seq_len(num_genes_RF)) { - # Get each deletion strain ORF - gene_sel <- unique(df_RF$OrfRep)[i] - - # Extract only the current deletion strain and its data - df_gene_sel <- df_RF %>% filter(OrfRep == gene_sel) - - # Calculate summary statistics for the selected gene - df_stats_interaction <- df_gene_sel %>% - group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% - summarise( - N = n(), - mean_L = mean(L, na.rm = TRUE), - median_L = median(L, na.rm = TRUE), - sd_L = sd(L, na.rm = TRUE), - se_L = sd_L / sqrt(N - 1), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - se_K = sd_K / sqrt(N - 1), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - se_r = sd_r / sqrt(N - 1), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - se_AUC = sd_AUC / sqrt(N - 1), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) - - # Get shift values - if (is.na(df_stats_interaction$mean_L[1]) || df_stats_interaction$mean_L[1] == 0) { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Raw_Shift_L = 0, Z_Shift_L = 0, - Raw_Shift_K = 0, Z_Shift_K = 0, - Raw_Shift_r = 0, Z_Shift_r = 0, - Raw_Shift_AUC = 0, Z_Shift_AUC = 0 - ) - } else { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Raw_Shift_L = mean_L[1] - background_mean_l, - Z_Shift_L = Raw_Shift_L / df_stats_by_l$sd[1], - Raw_Shift_K = mean_K[1] - background_mean_k, - Z_Shift_K = Raw_Shift_K / df_stats_by_k$sd[1], - Raw_Shift_r = mean_r[1] - background_mean_r, - Z_Shift_r = Raw_Shift_r / df_stats_by_r$sd[1], - Raw_Shift_AUC = mean_AUC[1] - background_mean_auc, - Z_Shift_AUC = Raw_Shift_AUC / df_stats_by_auc$sd[1] - ) - } - - # Add wild-type (WT) values and standard deviations - df_stats_interaction <- df_stats_interaction %>% - mutate( - WT_l = df_stats_by_l$mean, WT_sd_l = df_stats_by_l$sd, - WT_K = df_stats_by_k$mean, WT_sd_K = df_stats_by_k$sd, - WT_r = df_stats_by_r$mean, WT_sd_r = df_stats_by_r$sd, - WT_AUC = df_stats_by_auc$mean, WT_sd_AUC = df_stats_by_auc$sd - ) - - # Calculate scores if there is growth at no drug concentration - if (df_stats_interaction$mean_L[1] != 0 && !is.na(df_stats_interaction$mean_L[1])) { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Exp_L = WT_l + Raw_Shift_L, - Exp_K = WT_K + Raw_Shift_K, - Exp_r = WT_r + Raw_Shift_r, - Exp_AUC = WT_AUC + Raw_Shift_AUC, - Delta_L = mean_L - Exp_L, - Delta_K = mean_K - Exp_K, - Delta_r = mean_r - Exp_r, - Delta_AUC = mean_AUC - Exp_AUC, - Zscore_L = Delta_L / WT_sd_l, - Zscore_K = Delta_K / WT_sd_K, - Zscore_r = Delta_r / WT_sd_r, - Zscore_AUC = Delta_AUC / WT_sd_AUC - ) - - # Handle no growth (NG) and set to max (SM) values - if (sum(df_stats_interaction$NG, na.rm = TRUE) > 0) { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Delta_L = ifelse(NG == 1, mean_L - WT_l, Delta_L), - Delta_K = ifelse(NG == 1, mean_K - WT_K, Delta_K), - Delta_r = ifelse(NG == 1, mean_r - WT_r, Delta_r), - Delta_AUC = ifelse(NG == 1, mean_AUC - WT_AUC, Delta_AUC) - ) - } - - if (sum(df_stats_interaction$SM, na.rm = TRUE) > 0) { - df_stats_interaction <- df_stats_interaction %>% - mutate(Delta_L = ifelse(SM == 1, mean_L - WT_l, Delta_L)) - } - - # Calculate linear models - gene_lm_L <- lm(Delta_L ~ conc_num_factor, data = df_stats_interaction) - gene_lm_K <- lm(Delta_K ~ conc_num_factor, data = df_stats_interaction) - gene_lm_r <- lm(Delta_r ~ conc_num_factor, data = df_stats_interaction) - gene_lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = df_stats_interaction) - - # Calculate interaction scores and R-squared values - gene_interaction_L <- max_conc * coef(gene_lm_L)[2] + coef(gene_lm_L)[1] - r_squared_l <- summary(gene_lm_L)$r.squared - gene_interaction_K <- max_conc * coef(gene_lm_K)[2] + coef(gene_lm_K)[1] - r_squared_K <- summary(gene_lm_K)$r.squared - gene_interaction_r <- max_conc * coef(gene_lm_r)[2] + coef(gene_lm_r)[1] - r_squared_r <- summary(gene_lm_r)$r.squared - gene_interaction_AUC <- max_conc * coef(gene_lm_AUC)[2] + coef(gene_lm_AUC)[1] - r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - # Total non-removed concentrations - num_non_removed_conc <- Total_Conc_Nums - sum(df_stats_interaction$DB, na.rm = TRUE) - 1 - - # Report the scores - interaction_scores_RF <- interaction_scores_RF %>% - mutate( - Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_L[1]), - Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, df_stats_interaction$Z_Shift_L[1]), - lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, gene_interaction_L), - R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, r_squared_l), - Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE)), - Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE) / num_non_removed_conc), - Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_K[1]), - Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, df_stats_interaction$Z_Shift_K[1]), - lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, gene_interaction_K), - R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, r_squared_K), - Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE)), - Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE) / num_non_removed_conc), - Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_r[1]), - Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, df_stats_interaction$Z_Shift_r[1]), - lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, gene_interaction_r), - R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, r_squared_r), - Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE)), - Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE) / num_non_removed_conc), - Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_AUC[1]), - Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Z_Shift_AUC[1]), - lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, gene_interaction_AUC), - R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, r_squared_AUC), - Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE)), - Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE) / num_non_removed_conc) - ) - } else { - # Handle case where mean_L is 0 or NA - interaction_scores_RF <- interaction_scores_RF %>% - mutate( - Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, NA), - Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, NA), - lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, NA), - R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, NA), - Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, NA), - Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, NA), - Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, NA), - Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, NA), - lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, NA), - R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, NA), - Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, NA), - Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, NA), - Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, NA), - Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, NA), - lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, NA), - R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, NA), - Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, NA), - Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, NA), - Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, NA), - Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, NA), - lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, NA), - R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, NA), - Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, NA), - Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, NA) - ) - } - - # Append the interaction statistics for all RFs - if (i == 1) { - df_stats_interaction_all_RF <- df_stats_interaction - } else { - df_stats_interaction_all_RF <- bind_rows(df_stats_interaction_all_RF, df_stats_interaction) - } - - # Add NG, DB, and SM values to the InteractionScores_RF dataframe - interaction_scores_RF <- interaction_scores_RF %>% - mutate( - NG = replace(NG, OrfRep == gene_sel, sum(df_stats_interaction$NG, na.rm = TRUE)), - DB = replace(DB, OrfRep == gene_sel, sum(df_stats_interaction$DB, na.rm = TRUE)), - SM = replace(SM, OrfRep == gene_sel, sum(df_stats_interaction$SM, na.rm = TRUE)) - ) - } - - print("Pass RF Calculation loop") - - # Calculate summary statistics for the linear models - lm_sd_L <- sd(interaction_scores_RF$lm_Score_L, na.rm = TRUE) - lm_sd_K <- sd(interaction_scores_RF$lm_Score_K, na.rm = TRUE) - lm_sd_r <- sd(interaction_scores_RF$lm_Score_r, na.rm = TRUE) - lm_sd_AUC <- sd(interaction_scores_RF$lm_Score_AUC, na.rm = TRUE) - - lm_mean_L <- mean(interaction_scores_RF$lm_Score_L, na.rm = TRUE) - lm_mean_K <- mean(interaction_scores_RF$lm_Score_K, na.rm = TRUE) - lm_mean_r <- mean(interaction_scores_RF$lm_Score_r, na.rm = TRUE) - lm_mean_AUC <- mean(interaction_scores_RF$lm_Score_AUC, na.rm = TRUE) - - print(paste("Mean RF linear regression score L:", lm_mean_L)) - - # Calculate Z scores for the linear models - interaction_scores_RF <- interaction_scores_RF %>% - mutate( - Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L, - Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K, - Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r, - Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC - ) - - # Sort the dataframe by Z_lm_L and NG (No Growth) - interaction_scores_RF <- interaction_scores_RF %>% - arrange(desc(Z_lm_L), desc(NG)) - - write.csv(interaction_scores_RF, file = file.path(output_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) + # p_l <- ggplot() + # p_k <- ggplot() + # p_r <- ggplot() + # p_auc <- ggplot() + # p_rf_l <- ggplot() + # p_rf_k <- ggplot() + # p_rf_r <- ggplot() + # p_rf_auc <- ggplot() # Generate ggplot objects for each RF strain for (i in seq_len(num_genes_RF)) { @@ -816,214 +720,214 @@ for (s in background_strains) { scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + theme_publication() - # Append the final interaction statistics for all RFs - if (i == 1) { - df_stats_interaction_all_RF_final <- df_z_calculations - } else { - df_stats_interaction_all_RF_final <- bind_rows(df_stats_interaction_all_RF_final, df_z_calculations) - } + # # Append the final interaction statistics for all RFs + # if (i == 1) { + # df_stats_interaction_all_RF_final <- df_z_calculations + # } else { + # df_stats_interaction_all_RF_final <- bind_rows(df_stats_interaction_all_RF_final, df_z_calculations) + # } } - print("Pass RF ggplot loop") + # print("Pass RF ggplot loop") - # Save the final interaction statistics - write.csv(df_stats_interaction_all_RF_final, file = file.path(output_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE) + # # Save the final interaction statistics + # write.csv(df_stats_interaction_all_RF_final, file = file.path(output_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE) ###### Part 5 - Get Z-scores for Gene Deletion Strains # Get the total number of genes for the loop - num_genes <- length(unique(df_deletion$OrfRep)) - print(num_genes) + # num_genes <- length(unique(df_deletion$OrfRep)) + # print(num_genes) - # Create the output dataframe containing columns for each deletion strain - interaction_scores_deletion <- unique(df_deletion["OrfRep"]) - interaction_scores_deletion <- interaction_scores_deletion %>% - mutate( - Gene = NA, - Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA, R_Squared_L = NA, - Sum_Z_Score_L = NA, Avg_Zscore_L = NA, - Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA, R_Squared_K = NA, - Sum_Z_Score_K = NA, Avg_Zscore_K = NA, - Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA, R_Squared_r = NA, - Sum_Z_Score_r = NA, Avg_Zscore_r = NA, - Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA, R_Squared_AUC = NA, - Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA, - NG = NA, DB = NA, SM = NA - ) + # # Create the output dataframe containing columns for each deletion strain + # interaction_scores_deletion <- unique(df_deletion["OrfRep"]) + # interaction_scores_deletion <- interaction_scores_deletion %>% + # mutate( + # Gene = NA, + # Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA, R_Squared_L = NA, + # Sum_Z_Score_L = NA, Avg_Zscore_L = NA, + # Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA, R_Squared_K = NA, + # Sum_Z_Score_K = NA, Avg_Zscore_K = NA, + # Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA, R_Squared_r = NA, + # Sum_Z_Score_r = NA, Avg_Zscore_r = NA, + # Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA, R_Squared_AUC = NA, + # Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA, + # NG = NA, DB = NA, SM = NA + # ) - for (i in seq_len(num_genes)) { - gene_sel <- unique(df_deletion$OrfRep)[i] - df_gene_sel <- df_deletion %>% filter(OrfRep == gene_sel) + # for (i in seq_len(num_genes)) { + # gene_sel <- unique(df_deletion$OrfRep)[i] + # df_gene_sel <- df_deletion %>% filter(OrfRep == gene_sel) - df_stats_interaction <- df_gene_sel %>% - group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% - summarise( - N = length(L), mean_L = mean(L, na.rm = TRUE), median_L = median(L, na.rm = TRUE), - sd_L = sd(L, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), - mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), - mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), - mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1), - NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) - ) %>% - ungroup() + # df_stats_interaction <- df_gene_sel %>% + # group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% + # summarise( + # N = length(L), mean_L = mean(L, na.rm = TRUE), median_L = median(L, na.rm = TRUE), + # sd_L = sd(L, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), + # mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), + # sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), + # mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), + # sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), + # mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), + # sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1), + # NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) + # ) %>% + # ungroup() - if (is.na(df_stats_interaction$mean_L[1]) || df_stats_interaction$mean_L[1] == 0) { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Raw_Shift_L = 0, Raw_Shift_K = 0, Raw_Shift_r = 0, Raw_Shift_AUC = 0, - Z_Shift_L = 0, Z_Shift_K = 0, Z_Shift_r = 0, Z_Shift_AUC = 0 - ) - } else { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Raw_Shift_L = mean_L[1] - background_L, Raw_Shift_K = mean_K[1] - background_K, - Raw_Shift_r = mean_r[1] - background_r, Raw_Shift_AUC = mean_AUC[1] - background_AUC, - Z_Shift_L = Raw_Shift_L[1] / df_stats_BY_L$sd[1], - Z_Shift_K = Raw_Shift_K[1] / df_stats_BY_K$sd[1], - Z_Shift_r = Raw_Shift_r[1] / df_stats_BY_r$sd[1], - Z_Shift_AUC = Raw_Shift_AUC[1] / df_stats_BY_AUC$sd[1] - ) - } + # if (is.na(df_stats_interaction$mean_L[1]) || df_stats_interaction$mean_L[1] == 0) { + # df_stats_interaction <- df_stats_interaction %>% + # mutate( + # Raw_Shift_L = 0, Raw_Shift_K = 0, Raw_Shift_r = 0, Raw_Shift_AUC = 0, + # Z_Shift_L = 0, Z_Shift_K = 0, Z_Shift_r = 0, Z_Shift_AUC = 0 + # ) + # } else { + # df_stats_interaction <- df_stats_interaction %>% + # mutate( + # Raw_Shift_L = mean_L[1] - background_L, Raw_Shift_K = mean_K[1] - background_K, + # Raw_Shift_r = mean_r[1] - background_r, Raw_Shift_AUC = mean_AUC[1] - background_AUC, + # Z_Shift_L = Raw_Shift_L[1] / df_stats_BY_L$sd[1], + # Z_Shift_K = Raw_Shift_K[1] / df_stats_BY_K$sd[1], + # Z_Shift_r = Raw_Shift_r[1] / df_stats_BY_r$sd[1], + # Z_Shift_AUC = Raw_Shift_AUC[1] / df_stats_BY_AUC$sd[1] + # ) + # } - df_stats_interaction <- df_stats_interaction %>% - mutate( - WT_l = df_stats_BY_L$mean, WT_K = df_stats_BY_K$mean, - WT_r = df_stats_BY_r$mean, WT_AUC = df_stats_BY_AUC$mean, - WT_sd_l = df_stats_BY_L$sd, WT_sd_K = df_stats_BY_K$sd, - WT_sd_r = df_stats_BY_r$sd, WT_sd_AUC = df_stats_BY_AUC$sd - ) + # df_stats_interaction <- df_stats_interaction %>% + # mutate( + # WT_l = df_stats_BY_L$mean, WT_K = df_stats_BY_K$mean, + # WT_r = df_stats_BY_r$mean, WT_AUC = df_stats_BY_AUC$mean, + # WT_sd_l = df_stats_BY_L$sd, WT_sd_K = df_stats_BY_K$sd, + # WT_sd_r = df_stats_BY_r$sd, WT_sd_AUC = df_stats_BY_AUC$sd + # ) - if (df_stats_interaction$mean_L[1] != 0 && !is.na(df_stats_interaction$mean_L[1])) { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Exp_L = WT_l + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, - Exp_r = WT_r + Raw_Shift_r, Exp_AUC = WT_AUC + Raw_Shift_AUC, - Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, - Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC - ) + # if (df_stats_interaction$mean_L[1] != 0 && !is.na(df_stats_interaction$mean_L[1])) { + # df_stats_interaction <- df_stats_interaction %>% + # mutate( + # Exp_L = WT_l + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, + # Exp_r = WT_r + Raw_Shift_r, Exp_AUC = WT_AUC + Raw_Shift_AUC, + # Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, + # Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC + # ) - if (sum(df_stats_interaction$NG, na.rm = TRUE) > 0) { - df_stats_interaction <- df_stats_interaction %>% - mutate( - Delta_L = if_else(NG == 1, mean_L - WT_l, Delta_L), - Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), - Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), - Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC) - ) - } + # if (sum(df_stats_interaction$NG, na.rm = TRUE) > 0) { + # df_stats_interaction <- df_stats_interaction %>% + # mutate( + # Delta_L = if_else(NG == 1, mean_L - WT_l, Delta_L), + # Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), + # Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), + # Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC) + # ) + # } - if (sum(df_stats_interaction$SM, na.rm = TRUE) > 0) { - df_stats_interaction <- df_stats_interaction %>% - mutate(Delta_L = if_else(SM == 1, mean_L - WT_l, Delta_L)) - } + # if (sum(df_stats_interaction$SM, na.rm = TRUE) > 0) { + # df_stats_interaction <- df_stats_interaction %>% + # mutate(Delta_L = if_else(SM == 1, mean_L - WT_l, Delta_L)) + # } - df_stats_interaction <- df_stats_interaction %>% - mutate( - Zscore_L = Delta_L / WT_sd_l, Zscore_K = Delta_K / WT_sd_K, - Zscore_r = Delta_r / WT_sd_r, Zscore_AUC = Delta_AUC / WT_sd_AUC - ) + # df_stats_interaction <- df_stats_interaction %>% + # mutate( + # Zscore_L = Delta_L / WT_sd_l, Zscore_K = Delta_K / WT_sd_K, + # Zscore_r = Delta_r / WT_sd_r, Zscore_AUC = Delta_AUC / WT_sd_AUC + # ) - gene_lm_L <- lm(Delta_L ~ conc_num_factor, data = df_stats_interaction) - gene_lm_K <- lm(Delta_K ~ conc_num_factor, data = df_stats_interaction) - gene_lm_r <- lm(Delta_r ~ conc_num_factor, data = df_stats_interaction) - gene_lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = df_stats_interaction) + # gene_lm_L <- lm(Delta_L ~ conc_num_factor, data = df_stats_interaction) + # gene_lm_K <- lm(Delta_K ~ conc_num_factor, data = df_stats_interaction) + # gene_lm_r <- lm(Delta_r ~ conc_num_factor, data = df_stats_interaction) + # gene_lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = df_stats_interaction) - gene_interaction_L <- max_conc * coef(gene_lm_L)[2] + coef(gene_lm_L)[1] - gene_interaction_K <- max_conc * coef(gene_lm_K)[2] + coef(gene_lm_K)[1] - gene_interaction_r <- max_conc * coef(gene_lm_r)[2] + coef(gene_lm_r)[1] - gene_interaction_AUC <- max_conc * coef(gene_lm_AUC)[2] + coef(gene_lm_AUC)[1] + # gene_interaction_L <- max_conc * coef(gene_lm_L)[2] + coef(gene_lm_L)[1] + # gene_interaction_K <- max_conc * coef(gene_lm_K)[2] + coef(gene_lm_K)[1] + # gene_interaction_r <- max_conc * coef(gene_lm_r)[2] + coef(gene_lm_r)[1] + # gene_interaction_AUC <- max_conc * coef(gene_lm_AUC)[2] + coef(gene_lm_AUC)[1] - r_squared_l <- summary(gene_lm_L)$r.squared - r_squared_K <- summary(gene_lm_K)$r.squared - r_squared_r <- summary(gene_lm_r)$r.squared - r_squared_AUC <- summary(gene_lm_AUC)$r.squared + # r_squared_l <- summary(gene_lm_L)$r.squared + # r_squared_K <- summary(gene_lm_K)$r.squared + # r_squared_r <- summary(gene_lm_r)$r.squared + # r_squared_AUC <- summary(gene_lm_AUC)$r.squared - num_non_removed_conc <- total_conc_nums - sum(df_stats_interaction$DB, na.rm = TRUE) - 1 + # num_non_removed_conc <- total_conc_nums - sum(df_stats_interaction$DB, na.rm = TRUE) - 1 - interaction_scores_deletion <- interaction_scores_deletion %>% - mutate( - Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), - Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_L[1]), - Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, df_stats_interaction$Z_Shift_L[1]), - lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, gene_interaction_L), - Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, (gene_interaction_L - lm_mean_L) / lm_sd_L), - R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, r_squared_l), - Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE)), - Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE) / num_non_removed_conc), - Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_K[1]), - Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, df_stats_interaction$Z_Shift_K[1]), - lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, gene_interaction_K), - Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, (gene_interaction_K - lm_mean_K) / lm_sd_K), - R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, r_squared_K), - Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE)), - Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE) / num_non_removed_conc), - Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_r[1]), - Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, df_stats_interaction$Z_Shift_r[1]), - lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, gene_interaction_r), - Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, (gene_interaction_r - lm_mean_r) / lm_sd_r), - R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, r_squared_r), - Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE)), - Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1)), - Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_AUC[1]), - Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Z_Shift_AUC[1]), - lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, gene_interaction_AUC), - Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, (gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC), - R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, r_squared_AUC), - Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE)), - Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1)) - ) - } else { - # Similar logic for when mean_L is 0 or NA, setting relevant variables to NA or appropriate values - interaction_scores_deletion <- interaction_scores_deletion %>% - mutate( - Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), - Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, NA), - Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, NA), - lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, NA), - Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, NA), - R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, NA), - Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, NA), - Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, NA), - Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, NA), - Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, NA), - lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, NA), - Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, NA), - R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, NA), - Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, NA), - Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, NA), - Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, NA), - Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, NA), - lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, NA), - Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, NA), - R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, NA), - Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, NA), - Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, NA), - Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, NA), - Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, NA), - lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, NA), - Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, NA), - R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, NA), - Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, NA), - Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, NA) - ) - } + # interaction_scores_deletion <- interaction_scores_deletion %>% + # mutate( + # Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), + # Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_L[1]), + # Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, df_stats_interaction$Z_Shift_L[1]), + # lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, gene_interaction_L), + # Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, (gene_interaction_L - lm_mean_L) / lm_sd_L), + # R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, r_squared_l), + # Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE)), + # Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE) / num_non_removed_conc), + # Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_K[1]), + # Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, df_stats_interaction$Z_Shift_K[1]), + # lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, gene_interaction_K), + # Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, (gene_interaction_K - lm_mean_K) / lm_sd_K), + # R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, r_squared_K), + # Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE)), + # Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE) / num_non_removed_conc), + # Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_r[1]), + # Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, df_stats_interaction$Z_Shift_r[1]), + # lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, gene_interaction_r), + # Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, (gene_interaction_r - lm_mean_r) / lm_sd_r), + # R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, r_squared_r), + # Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE)), + # Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1)), + # Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_AUC[1]), + # Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Z_Shift_AUC[1]), + # lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, gene_interaction_AUC), + # Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, (gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC), + # R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, r_squared_AUC), + # Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE)), + # Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1)) + # ) + # } else { + # # Similar logic for when mean_L is 0 or NA, setting relevant variables to NA or appropriate values + # interaction_scores_deletion <- interaction_scores_deletion %>% + # mutate( + # Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), + # Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, NA), + # Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, NA), + # lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, NA), + # Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, NA), + # R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, NA), + # Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, NA), + # Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, NA), + # Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, NA), + # Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, NA), + # lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, NA), + # Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, NA), + # R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, NA), + # Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, NA), + # Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, NA), + # Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, NA), + # Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, NA), + # lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, NA), + # Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, NA), + # R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, NA), + # Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, NA), + # Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, NA), + # Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, NA), + # Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, NA), + # lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, NA), + # Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, NA), + # R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, NA), + # Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, NA), + # Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, NA) + # ) + # } - if (i == 1) { - df_stats_interaction_all <- df_stats_interaction - } else { - df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction) - } + # if (i == 1) { + # df_stats_interaction_all <- df_stats_interaction + # } else { + # df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction) + # } - interaction_scores_deletion <- interaction_scores_deletion %>% - mutate( - NG = replace(NG, OrfRep == gene_sel, sum(df_stats_interaction$NG, na.rm = TRUE)), - DB = replace(DB, OrfRep == gene_sel, sum(df_stats_interaction$DB, na.rm = TRUE)), - SM = replace(SM, OrfRep == gene_sel, sum(df_stats_interaction$SM, na.rm = TRUE)) - ) - } + # interaction_scores_deletion <- interaction_scores_deletion %>% + # mutate( + # NG = replace(NG, OrfRep == gene_sel, sum(df_stats_interaction$NG, na.rm = TRUE)), + # DB = replace(DB, OrfRep == gene_sel, sum(df_stats_interaction$DB, na.rm = TRUE)), + # SM = replace(SM, OrfRep == gene_sel, sum(df_stats_interaction$SM, na.rm = TRUE)) + # ) + # } print("Pass Int Calculation loop")