join_interaction_zscores.R 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. suppressMessages({
  2. library("dplyr")
  3. library("data.table")
  4. library("readr")
  5. library("stringr")
  6. })
  7. # Function to parse arguments
  8. parse_arguments <- function() {
  9. if (interactive()) {
  10. args <- c(
  11. "/home/bryan/documents/develop/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
  12. 3, # sd value
  13. "/home/bryan/documents/develop/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
  14. "Experiment 1: Doxo versus HLD",
  15. "/home/bryan/documents/develop/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
  16. "Experiment 2: HLD versus Doxo"
  17. )
  18. } else {
  19. args <- commandArgs(trailingOnly = TRUE)
  20. }
  21. out_dir <- normalizePath(args[1], mustWork = FALSE)
  22. sd <- as.numeric(args[2])
  23. paths <- normalizePath(args[seq(3, length(args), by = 2)], mustWork = FALSE)
  24. names <- args[seq(4, length(args), by = 2)]
  25. experiments <- setNames(paths, names)
  26. list(
  27. out_dir = out_dir,
  28. sd = sd,
  29. experiments = experiments
  30. )
  31. }
  32. args <- parse_arguments()
  33. # Ensure main output directory exists
  34. dir.create(args$out_dir, showWarnings = FALSE, recursive = TRUE)
  35. # Function to read and combine z-score interaction files
  36. combine_zscores <- function(experiments, out_dir) {
  37. combined_data <- lapply(names(experiments), function(exp_name) {
  38. exp_dir <- experiments[[exp_name]]
  39. zscore_file <- file.path(exp_dir, "zscores", "zscore_interactions.csv")
  40. if (!file.exists(zscore_file)) {
  41. stop("Z-score file does not exist for ", exp_name, " at ", zscore_file)
  42. }
  43. message("Reading z-score file for ", exp_name, " from ", zscore_file)
  44. data <- fread(zscore_file)
  45. data$Experiment <- exp_name
  46. return(data)
  47. }) %>%
  48. bind_rows()
  49. combined_output_file <- file.path(out_dir, "combined_zscores.csv")
  50. fwrite(combined_data, combined_output_file, row.names = FALSE)
  51. message("Combined z-score file saved to: ", combined_output_file)
  52. }
  53. # Function to read and combine summary statistics files
  54. combine_summary_stats <- function(experiments, out_dir) {
  55. combined_stats <- lapply(names(experiments), function(exp_name) {
  56. exp_dir <- experiments[[exp_name]]
  57. summary_file <- file.path(exp_dir, "zscores", "summary_stats_all_strains.csv")
  58. if (!file.exists(summary_file)) {
  59. stop("Summary stats file does not exist for ", exp_name, " at ", summary_file)
  60. }
  61. message("Reading summary stats file for ", exp_name, " from ", summary_file)
  62. data <- fread(summary_file)
  63. data$Experiment <- exp_name
  64. return(data)
  65. }) %>%
  66. bind_rows()
  67. combined_output_file <- file.path(out_dir, "combined_summary_stats.csv")
  68. fwrite(combined_stats, combined_output_file, row.names = FALSE)
  69. message("Combined summary stats file saved to: ", combined_output_file)
  70. }
  71. # Function to generate final summary report
  72. generate_final_report <- function(out_dir) {
  73. combined_zscores <- file.path(out_dir, "combined_zscores.csv")
  74. combined_stats <- file.path(out_dir, "combined_summary_stats.csv")
  75. if (!file.exists(combined_zscores) || !file.exists(combined_stats)) {
  76. stop("Combined z-scores or summary stats files do not exist.")
  77. }
  78. zscores_data <- fread(combined_zscores)
  79. stats_data <- fread(combined_stats)
  80. message("Merging z-score and summary stats data...")
  81. final_report <- merge(zscores_data, stats_data, by = c("OrfRep", "Experiment"), all = TRUE, allow.cartesian = TRUE)
  82. final_report_file <- file.path(out_dir, "final_combined_report.csv")
  83. fwrite(final_report, final_report_file, row.names = FALSE)
  84. message("Final combined report saved to: ", final_report_file)
  85. }
  86. # Process all experiments and generate outputs
  87. combine_zscores(args$experiments, args$out_dir)
  88. combine_summary_stats(args$experiments, args$out_dir)
  89. generate_final_report(args$out_dir)