calculate_pairwise_lk.R 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. suppressMessages({
  2. library("ggplot2")
  3. library("plotly")
  4. library("htmlwidgets")
  5. library("extrafont")
  6. library("grid")
  7. library("ggthemes")
  8. })
  9. # Constants for configuration
  10. PLOT_WIDTH <- 12
  11. PLOT_HEIGHT <- 8
  12. BASE_SIZE <- 14
  13. options(warn = 2, max.print = 100)
  14. # Parse arguments
  15. parse_arguments <- function() {
  16. if (interactive()) {
  17. args <- c(
  18. "Exp1",
  19. "/path/to/exp1_file.csv",
  20. "Exp2",
  21. "/path/to/exp2_file.csv",
  22. "/path/to/output_dir"
  23. )
  24. } else {
  25. args <- commandArgs(trailingOnly = TRUE)
  26. }
  27. list(
  28. exp1_name = args[1],
  29. exp1_file = normalizePath(args[2], mustWork = TRUE),
  30. exp2_name = args[3],
  31. exp2_file = normalizePath(args[4], mustWork = TRUE),
  32. output_dir = normalizePath(args[5], mustWork = FALSE)
  33. )
  34. }
  35. args <- parse_arguments()
  36. # Create output directories if they don't exist
  37. pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
  38. pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
  39. dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
  40. dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
  41. # Define themes and scales
  42. theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
  43. theme_foundation(base_size = base_size, base_family = base_family) +
  44. theme(
  45. plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
  46. panel.background = element_rect(colour = NA),
  47. plot.background = element_rect(colour = NA),
  48. panel.border = element_rect(colour = NA),
  49. axis.title = element_text(face = "bold", size = rel(1)),
  50. axis.line = element_line(colour = "black"),
  51. panel.grid.major = element_line(colour = "#f0f0f0"),
  52. panel.grid.minor = element_blank(),
  53. legend.position = "bottom",
  54. legend.direction = "horizontal",
  55. legend.key.size = unit(0.2, "cm"),
  56. plot.margin = unit(c(10, 5, 5, 5), "mm"),
  57. strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
  58. strip.text = element_text(face = "bold")
  59. )
  60. }
  61. theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
  62. theme_publication(base_size, base_family) +
  63. theme(
  64. legend.position = "right",
  65. legend.direction = "vertical",
  66. legend.key.size = unit(0.5, "cm")
  67. )
  68. }
  69. scale_fill_publication <- function(...) {
  70. discrete_scale("fill", "Publication", manual_pal(values = c(
  71. "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
  72. "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
  73. )), ...)
  74. }
  75. scale_colour_publication <- function(...) {
  76. discrete_scale("colour", "Publication", manual_pal(values = c(
  77. "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
  78. "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
  79. )), ...)
  80. }
  81. # Read and merge data
  82. load_and_merge_data <- function(file1, file2) {
  83. df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
  84. df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
  85. merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
  86. }
  87. # Generate a plot and save to PDF and HTML
  88. generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
  89. gg <- ggplot(data = data, aes_string(
  90. x = x_var,
  91. y = y_var,
  92. color = color_var
  93. )) +
  94. xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
  95. geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  96. geom_point(shape = 3) +
  97. ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
  98. ggtitle(title) +
  99. theme_function()
  100. pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
  101. print(gg)
  102. dev.off()
  103. pgg <- ggplotly(gg)
  104. htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
  105. }
  106. # Identify and annotate specific interactions
  107. annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
  108. df$Overlap_Avg <- NA
  109. interactions <- list(
  110. "df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
  111. "df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
  112. "df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
  113. "df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
  114. "Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
  115. "Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
  116. "df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
  117. "df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
  118. )
  119. annotation <- list(
  120. df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
  121. df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
  122. df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
  123. df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
  124. Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
  125. Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
  126. df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
  127. df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
  128. )
  129. for (key in names(interactions)) {
  130. try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
  131. }
  132. df
  133. }
  134. # Rank and filter data
  135. rank_and_filter_data <- function(df, suffix) {
  136. z1 <- df
  137. z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
  138. z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
  139. z2 <- df
  140. z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
  141. z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
  142. df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
  143. df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
  144. df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
  145. list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
  146. }
  147. # Main execution function for a pairwise comparison
  148. run_pairwise_comparison <- function(suffix, dir) {
  149. df <- load_and_merge_data(args$exp1_file, args$exp2_file)
  150. # Generate initial ontology-based plot
  151. generate_plot(df,
  152. paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
  153. paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
  154. file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
  155. theme_publication_legend_right)
  156. # Annotate interactions and generate overlap-based plot
  157. df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
  158. ranks <- rank_and_filter_data(df, suffix)
  159. generate_plot(df,
  160. paste0("Z_lm_", suffix, "_Avg_df1"),
  161. paste0("Z_lm_", suffix, "_Avg_df2"),
  162. "Overlap_Avg",
  163. paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
  164. file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
  165. theme_publication_legend_right)
  166. generate_plot(ranks$df_above_threshold,
  167. paste0("Z_lm_", suffix, "_Avg_df1"),
  168. paste0("Z_lm_", suffix, "_Avg_df2"),
  169. "Overlap_Avg",
  170. paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
  171. file.path(dir,
  172. paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
  173. theme_publication_legend_right)
  174. # Save CSV files
  175. write.csv(df, file.path(dir,
  176. paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
  177. row.names = FALSE)
  178. write.csv(ranks$df_above_threshold,
  179. file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
  180. row.names = FALSE)
  181. }
  182. # Execute Pairwise L and Pairwise K comparisons
  183. run_pairwise_comparison("L", pairDirL)
  184. run_pairwise_comparison("K", pairDirK)