Fix y~x predictor column names
This commit is contained in:
@@ -217,7 +217,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
group_vars <- c("OrfRep", "Gene", "Drug")
|
group_vars <- c("OrfRep", "Gene", "Drug")
|
||||||
}
|
}
|
||||||
|
|
||||||
perform_lm_simple <- function(x, y, max_conc) {
|
perform_lm <- function(x, y, max_conc) {
|
||||||
if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) {
|
if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) {
|
||||||
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
|
return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
|
||||||
} else {
|
} else {
|
||||||
@@ -316,10 +316,10 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
mutate(
|
mutate(
|
||||||
# Apply the simple LM function for each variable
|
# Apply the simple LM function for each variable
|
||||||
lm_L = list(perform_lm_simple(Delta_L, conc_num_factor, max_conc)),
|
lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
|
||||||
lm_K = list(perform_lm_simple(Delta_K, conc_num_factor, max_conc)),
|
lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
|
||||||
lm_r = list(perform_lm_simple(Delta_r, conc_num_factor, max_conc)),
|
lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
|
||||||
lm_AUC = list(perform_lm_simple(Delta_AUC, conc_num_factor, max_conc)),
|
lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
|
||||||
|
|
||||||
# Extract coefficients and statistics for each model
|
# Extract coefficients and statistics for each model
|
||||||
lm_intercept_L = lm_L[[1]]$intercept,
|
lm_intercept_L = lm_L[[1]]$intercept,
|
||||||
@@ -480,19 +480,19 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
|
||||||
TRUE ~ "No Effect"
|
TRUE ~ "No Effect"
|
||||||
),
|
),
|
||||||
# Apply the perform_lm_simple function for each variable pair
|
# Apply the perform_lm function for each variable pair
|
||||||
lm_L = list(perform_lm_simple(Z_lm_L, Avg_Zscore_L, max_conc)),
|
lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
|
||||||
lm_K = list(perform_lm_simple(Z_lm_K, Avg_Zscore_K, max_conc)),
|
lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
|
||||||
lm_r = list(perform_lm_simple(Z_lm_r, Avg_Zscore_r, max_conc)),
|
lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
|
||||||
lm_AUC = list(perform_lm_simple(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
|
lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
|
||||||
|
|
||||||
# Correlation models for various pairs
|
# Correlation models for various pairs
|
||||||
Z_lm_K_L = list(perform_lm_simple(Z_lm_K, Z_lm_L, max_conc)),
|
Z_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
|
||||||
Z_lm_r_L = list(perform_lm_simple(Z_lm_r, Z_lm_L, max_conc)),
|
Z_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
|
||||||
Z_lm_R_AUC_L = list(perform_lm_simple(Z_lm_AUC, Z_lm_L, max_conc)),
|
Z_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
|
||||||
Z_lm_R_r_K = list(perform_lm_simple(Z_lm_r, Z_lm_K, max_conc)),
|
Z_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
|
||||||
Z_lm_R_AUC_K = list(perform_lm_simple(Z_lm_AUC, Z_lm_K, max_conc)),
|
Z_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
|
||||||
Z_lm_R_AUC_r = list(perform_lm_simple(Z_lm_AUC, Z_lm_r, max_conc)),
|
Z_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)),
|
||||||
|
|
||||||
# Extract coefficients and statistics for each model
|
# Extract coefficients and statistics for each model
|
||||||
lm_rank_intercept_L = lm_L[[1]]$intercept,
|
lm_rank_intercept_L = lm_L[[1]]$intercept,
|
||||||
@@ -525,29 +525,29 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared,
|
Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared,
|
||||||
Z_lm_Score_r_L = Z_lm_r_L[[1]]$score,
|
Z_lm_Score_r_L = Z_lm_r_L[[1]]$score,
|
||||||
|
|
||||||
Z_lm_intercept_R_AUC_L = Z_lm_R_AUC_L[[1]]$intercept,
|
Z_lm_intercept_AUC_L = Z_lm_AUC_L[[1]]$intercept,
|
||||||
Z_lm_slope_R_AUC_L = Z_lm_R_AUC_L[[1]]$slope,
|
Z_lm_slope_AUC_L = Z_lm_AUC_L[[1]]$slope,
|
||||||
Z_lm_R_squared_R_AUC_L = Z_lm_R_AUC_L[[1]]$r_squared,
|
Z_lm_R_squared_AUC_L = Z_lm_AUC_L[[1]]$r_squared,
|
||||||
Z_lm_Score_R_AUC_L = Z_lm_R_AUC_L[[1]]$score,
|
Z_lm_Score_AUC_L = Z_lm_AUC_L[[1]]$score,
|
||||||
|
|
||||||
Z_lm_intercept_R_r_K = Z_lm_R_r_K[[1]]$intercept,
|
Z_lm_intercept_r_K = Z_lm_r_K[[1]]$intercept,
|
||||||
Z_lm_slope_R_r_K = Z_lm_R_r_K[[1]]$slope,
|
Z_lm_slope_r_K = Z_lm_r_K[[1]]$slope,
|
||||||
Z_lm_R_squared_R_r_K = Z_lm_R_r_K[[1]]$r_squared,
|
Z_lm_R_squared_r_K = Z_lm_r_K[[1]]$r_squared,
|
||||||
Z_lm_Score_R_r_K = Z_lm_R_r_K[[1]]$score,
|
Z_lm_Score_r_K = Z_lm_r_K[[1]]$score,
|
||||||
|
|
||||||
Z_lm_intercept_R_AUC_K = Z_lm_R_AUC_K[[1]]$intercept,
|
Z_lm_intercept_AUC_K = Z_lm_AUC_K[[1]]$intercept,
|
||||||
Z_lm_slope_R_AUC_K = Z_lm_R_AUC_K[[1]]$slope,
|
Z_lm_slope_AUC_K = Z_lm_AUC_K[[1]]$slope,
|
||||||
Z_lm_R_squared_R_AUC_K = Z_lm_R_AUC_K[[1]]$r_squared,
|
Z_lm_R_squared_AUC_K = Z_lm_AUC_K[[1]]$r_squared,
|
||||||
Z_lm_Score_R_AUC_K = Z_lm_R_AUC_K[[1]]$score,
|
Z_lm_Score_AUC_K = Z_lm_AUC_K[[1]]$score,
|
||||||
|
|
||||||
Z_lm_intercept_R_AUC_r = Z_lm_R_AUC_r[[1]]$intercept,
|
Z_lm_intercept_AUC_r = Z_lm_AUC_r[[1]]$intercept,
|
||||||
Z_lm_slope_R_AUC_r = Z_lm_R_AUC_r[[1]]$slope,
|
Z_lm_slope_AUC_r = Z_lm_AUC_r[[1]]$slope,
|
||||||
Z_lm_R_squared_R_AUC_r = Z_lm_R_AUC_r[[1]]$r_squared,
|
Z_lm_R_squared_AUC_r = Z_lm_AUC_r[[1]]$r_squared,
|
||||||
Z_lm_Score_R_AUC_r = Z_lm_R_AUC_r[[1]]$score
|
Z_lm_Score_AUC_r = Z_lm_AUC_r[[1]]$score
|
||||||
) %>%
|
) %>%
|
||||||
select(
|
select(
|
||||||
-lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L,
|
-lm_L, -lm_K, -lm_r, -lm_AUC,
|
||||||
-Z_lm_R_r_K, -Z_lm_R_AUC_K, -Z_lm_R_AUC_r)
|
-Z_lm_K_L, -Z_lm_r_L, -Z_lm_AUC_L, -Z_lm_r_K, -Z_lm_AUC_K, -Z_lm_AUC_r)
|
||||||
} # end deletion-specific block
|
} # end deletion-specific block
|
||||||
|
|
||||||
# Create the final calculations and interactions dataframes with only required columns for csv output
|
# Create the final calculations and interactions dataframes with only required columns for csv output
|
||||||
@@ -573,15 +573,27 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
|
|||||||
|
|
||||||
df_interactions <- interactions %>%
|
df_interactions <- interactions %>%
|
||||||
select(
|
select(
|
||||||
all_of(group_vars),
|
any_of(c(
|
||||||
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
|
group_vars,
|
||||||
Sum_Z_Score_L, Sum_Z_Score_K, Sum_Z_Score_r, Sum_Z_Score_AUC,
|
"Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
|
||||||
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
|
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
||||||
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
|
||||||
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
|
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||||
lm_Score_L, lm_Score_K, lm_Score_r, lm_Score_AUC,
|
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||||
R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC,
|
"lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
|
||||||
NG_sum_int, DB_sum_int, SM_sum_int
|
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
||||||
|
"NG_sum_int", "DB_sum_int", "SM_sum_int",
|
||||||
|
"Z_lm_intercept_L", "Z_lm_slope_L", "Z_lm_R_squared_L", "Z_lm_Score_L",
|
||||||
|
"Z_lm_intercept_K", "Z_lm_slope_K", "Z_lm_R_squared_K", "Z_lm_Score_K",
|
||||||
|
"Z_lm_intercept_r", "Z_lm_slope_r", "Z_lm_R_squared_r", "Z_lm_Score_r",
|
||||||
|
"Z_lm_intercept_AUC", "Z_lm_slope_AUC", "Z_lm_R_squared_AUC", "Z_lm_Score_AUC",
|
||||||
|
"Z_lm_intercept_K_L", "Z_lm_slope_K_L", "Z_lm_R_squared_K_L", "Z_lm_Score_K_L",
|
||||||
|
"Z_lm_intercept_r_L", "Z_lm_slope_r_L", "Z_lm_R_squared_r_L", "Z_lm_Score_r_L",
|
||||||
|
"Z_lm_intercept_AUC_L", "Z_lm_slope_AUC_L", "Z_lm_R_squared_AUC_L", "Z_lm_Score_AUC_L",
|
||||||
|
"Z_lm_intercept_r_K", "Z_lm_slope_r_K", "Z_lm_R_squared_r_K", "Z_lm_Score_r_K",
|
||||||
|
"Z_lm_intercept_AUC_K", "Z_lm_slope_AUC_K", "Z_lm_R_squared_AUC_K", "Z_lm_Score_AUC_K",
|
||||||
|
"Z_lm_intercept_AUC_r", "Z_lm_slope_AUC_r", "Z_lm_R_squared_AUC_r", "Z_lm_Score_AUC_r"
|
||||||
|
))
|
||||||
) %>%
|
) %>%
|
||||||
rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
|
rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
|
||||||
|
|
||||||
@@ -1296,21 +1308,28 @@ generate_correlation_plot_configs <- function(df, df_reference) {
|
|||||||
x_var <- paste0("Z_lm_", rel$x) # predictor
|
x_var <- paste0("Z_lm_", rel$x) # predictor
|
||||||
y_var <- paste0("Z_lm_", rel$y) # response
|
y_var <- paste0("Z_lm_", rel$y) # response
|
||||||
|
|
||||||
# Skip this plot if there's no valid data
|
print(paste("rel$x:", rel$x))
|
||||||
# if (all(is.na(df[[x_var]])) || all(is.na(df_reference[[x_var]])) ||
|
print(paste("rel$y:", rel$y))
|
||||||
# all(is.na(df[[y_var]])) || all(is.na(df_reference[[y_var]]))) {
|
print(paste("Generating correlation plot for response(y):", y_var, "and predictor(x):", x_var))
|
||||||
# next
|
print(paste("Relationship suffix:", rel$y, "_", rel$x))
|
||||||
# }
|
|
||||||
|
|
||||||
# Find the max and min of both dataframes for printing linear regression line
|
|
||||||
xmin <- min(c(min(df[[x_var]], na.rm = TRUE), min(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
|
|
||||||
xmax <- max(c(max(df[[x_var]], na.rm = TRUE), max(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
|
|
||||||
|
|
||||||
# Extract the R-squared, intercept, and slope from the df (first value)
|
# Extract the R-squared, intercept, and slope from the df (first value)
|
||||||
intercept <- df[[paste0("Z_lm_intercept_", rel$y, "_", rel$x)]][1]
|
intercept <- df[[paste0("Z_lm_intercept_", rel$y, "_", rel$x)]][1]
|
||||||
slope <- df[[paste0("Z_lm_slope_", rel$y, "_", rel$x)]][1]
|
slope <- df[[paste0("Z_lm_slope_", rel$y, "_", rel$x)]][1]
|
||||||
r_squared <- df[[paste0("Z_lm_R_squared_", rel$y, "_", rel$x)]][1]
|
r_squared <- df[[paste0("Z_lm_R_squared_", rel$y, "_", rel$x)]][1]
|
||||||
|
|
||||||
|
print(paste("intercept:", intercept))
|
||||||
|
print(paste("slope:", slope))
|
||||||
|
print(paste("r_squared:", r_squared))
|
||||||
|
|
||||||
|
r_squared_rounded <- round(r_squared, 4)
|
||||||
|
r_squared_label <- paste("R-squared =", r_squared_rounded)
|
||||||
|
print(paste("r_squared_label:", r_squared_label))
|
||||||
|
|
||||||
|
# Find the max and min of both dataframes for printing linear regression line
|
||||||
|
xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]])))
|
||||||
|
xmax <- max(c(max(df[[x_var]]), max(df_reference[[x_var]])))
|
||||||
|
|
||||||
# Generate the label for the plot
|
# Generate the label for the plot
|
||||||
plot_label <- paste("Interaction", rel$x, "vs.", rel$y)
|
plot_label <- paste("Interaction", rel$x, "vs.", rel$y)
|
||||||
|
|
||||||
@@ -1326,7 +1345,7 @@ generate_correlation_plot_configs <- function(df, df_reference) {
|
|||||||
list(
|
list(
|
||||||
x = mean(df[[x_var]], na.rm = TRUE),
|
x = mean(df[[x_var]], na.rm = TRUE),
|
||||||
y = mean(df[[y_var]], na.rm = TRUE),
|
y = mean(df[[y_var]], na.rm = TRUE),
|
||||||
label = paste("R-squared =", round(r_squared, 3))
|
label = r_squared_label
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
lm_line = list(
|
lm_line = list(
|
||||||
|
|||||||
Reference in New Issue
Block a user