library(data.table) library(MendelianRandomization) library(optparse) opt_parser <- OptionParser(option_list = list( make_option("--plink-path", type = "character", help = "Path of the plink1.9 executable") )) opt <- parse_args(opt_parser) income_file <- "output/data/income_kweon.feather" education_file <- "output/data/ieu-a-1239.feather" print("Reading data") inc_data <- arrow::read_feather(income_file) edu_data <- arrow::read_feather(education_file) print("Merging exposure datasets") significant_snps <- c( inc_data[p < 5e-8, rsid], edu_data[p < 5e-8, rsid] ) significant <- merge( inc_data[rsid %in% significant_snps], edu_data[rsid %in% significant_snps], suffixes = c(".inc", ".edu") ) print("Aligning effect alleles") # swap alleles to align with income data significant[a1.inc != a1.edu, c("beta.edu", "a1.edu", "a2.edu") := .(-beta.edu, a2.edu, a1.edu)] # remove rows where there is still a mismatch significant <- significant[a1.inc == a1.edu & a2.inc == a2.edu] setnames(significant, "a1.inc", "a1") setnames(significant, "a2.inc", "a2") significant[, c("a1.edu", "a2.edu", "chr.inc", "chr.edu", "bp.inc", "bp.edu") := NULL] # Use p-value from inc (income) to determine index SNP when clumping significant[, pval := p.inc] print("Clumping") clumped <- ieugwasr::ld_clump( significant, clump_kb = 10000, clump_r2 = 0.001, bfile = here::here("input/ld_ref_panel/EUR"), plink_bin = opt$`plink-path` ) arrow::write_feather(significant, here::here("output/data/mvmr_exposure_data_all.feather")) arrow::write_feather(clumped, here::here("output/data/mvmr_exposure_data_clumped.feather"))