Procházet zdrojové kódy

Revert load_and_process_data

Bryan Roessler před 7 měsíci
rodič
revize
d565f7f95f
1 změnil soubory, kde provedl 10 přidání a 26 odebrání
  1. 10 26
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 10 - 26
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -115,42 +115,26 @@ scale_colour_publication <- function(...) {
 
 # Load the initial dataframe from the easy_results_file
 load_and_process_data <- function(easy_results_file, sd = 3) {
-  df <- read.delim(
-    easy_results_file,
-    skip = 2,
-    stringsAsFactors = FALSE,
-    row.names = 1,
-    strip.white = TRUE
-  )
-
-  # Filter and rename columns
-  df <- df %>%
-    filter(!is.na(ORF) & ORF != "") %>%
-    filter(!Gene %in% c("BLANK", "Blank", "blank")) %>%
-    filter(Drug != "BMH21") %>%
-    rename(
-      L = l,
-      num = Num.,
-      AUC = AUC96,
-      scan = Scan,
-      last_bg = LstBackgrd,
-      first_bg = X1stBackgrd
-    ) %>%
-    mutate(across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric))
-
-  # Calculate delta background and tolerance
+  df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
+  
   df <- df %>%
+    filter(!(.[[1]] %in% c("", "Scan"))) %>%
+    filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>%
+    # Rename columns
+    rename(L = l, num = Num., AUC = AUC96, scan = Scan, last_bg = LstBackgrd, first_bg = X1stBackgrd) %>%
     mutate(
+      across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric),
       delta_bg = last_bg - first_bg,
       delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
       NG = if_else(L == 0 & !is.na(L), 1, 0),
       DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
       SM = 0,
-      OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
+      OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
       conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
       conc_num_factor = as.factor(conc_num)
+      # conc_num_factor = factor(conc_num, levels = sort(unique(conc_num)))
     )
-
+  
   return(df)
 }