library(data.table)
library(MendelianRandomization)
library(optparse)

opt_parser <- OptionParser(option_list = list(
  make_option("--outcome", type = "character", help = "Outcome dataset ID"),
  make_option("--plink-path", type = "character", help = "Path of the plink1.9 executable")
))
opt <- parse_args(opt_parser)

exposure_file <- here::here("output/data/mvmr_exposure_data_all.feather")
outcome_file <- here::here(sprintf("output/data/%s.feather", opt$outcome))

output_folder <- here::here(sprintf("output/analysis/mvmr/%s", opt$outcome))
dir.create(output_folder, recursive = TRUE, showWarnings = FALSE)

# exposure_file <- "output/data/mvmr_exposure_data_all.feather"
# outcome_file <- "output/data/birthweight.feather"

print("Reading data")
exposure_data <- arrow::read_feather(exposure_file)
outcome_data <- arrow::read_feather(outcome_file)

print("Merging exposure and outcome datasets")
joined <- merge(exposure_data, outcome_data)

# swap alleles to align with exposure data
joined[a1.x != a1.y, c("beta", "a1.y", "a2.y") := .(-beta, a2.y, a1.y)]
# remove rows where there is still a mismatch
joined <- joined[a1.x == a1.y & a2.x == a2.y]

# Use p-value from income to determine index SNP when clumping
joined[, pval := p.inc]

print("Clumping")
clumped <- ieugwasr::ld_clump(
  joined,
  clump_kb = 10000,
  clump_r2 = 0.001,
  bfile = here::here("input/ld_ref_panel/EUR"),
  plink_bin = opt$`plink-path`
)

# Save list of SNPs before and after clumping
fwrite(
  joined[, .(rsid, beta.inc, beta.edu, se.inc, se.edu, p.inc, p.edu)],
  file.path(output_folder, "snps_before_clumping.csv")
)
fwrite(
  clumped[, .(rsid, beta.inc, beta.edu, se.inc, se.edu, p.inc, p.edu)],
  file.path(output_folder, "snps_after_clumping.csv")
)

print("Running analyses (MVMR package)")
formatted <- MVMR::format_mvmr(
  BXGs = clumped[, .(beta.inc, beta.edu)],
  BYG = clumped$beta,
  seBXGs = clumped[, .(se.inc, se.edu)],
  seBYG = clumped$se,
  RSID = clumped$rsid
)

overlap <- clumped[, .(inc_sig = p.inc < 5e-8, edu_sig = p.edu < 5e-8)][, .(
  inc_only = sum(inc_sig & !edu_sig),
  edu_only = sum(edu_sig & !inc_sig),
  both = sum(inc_sig & edu_sig)
)]
fwrite(overlap, file.path(output_folder, "overlap_snp_counts.csv"))

fwrite(
  MVMR::strhet_mvmr(formatted),
  file.path(output_folder, "strhet_mvmr.csv")
)
fwrite(
  MVMR::strength_mvmr(formatted),
  file.path(output_folder, "strength_mvmr.csv")
)
fwrite(
  MVMR::ivw_mvmr(formatted),
  file.path(output_folder, "ivw_mvmr.csv")
)

mvinput <-   mr_mvinput(
  bx = as.matrix(clumped[, .(beta.inc, beta.edu)]),
  bxse = as.matrix(clumped[, .(se.inc, se.edu)]),
  by = clumped$beta,
  byse = clumped$se,
  exposure = c("income", "education"),
  outcome = opt$outcome,
  snps = clumped$rsid
)

m <- mr_mvivw(mvinput)
sink(file = file.path(output_folder, "mr_mvivw.txt"))
m
sink(file = NULL)
saveRDS(m, file.path(output_folder, "mr_mvivw.rds"))

m <- mr_mvivw(mvinput, robust = TRUE)
sink(file = file.path(output_folder, "mr_mvivw_robust.txt"))
m
sink(file = NULL)
saveRDS(m, file.path(output_folder, "mr_mvivw_robust.rds"))

m <- mr_mvegger(mvinput, orientate = 1)
sink(file = file.path(output_folder, "mr_mvegger.txt"))
m
sink(file = NULL)
saveRDS(m, file.path(output_folder, "mr_mvegger.rds"))

m <- mr_mvmedian(mvinput)
sink(file = file.path(output_folder, "mr_mvmedian.txt"))
m
sink(file = NULL)
saveRDS(m, file.path(output_folder, "mr_mvmedian.rds"))

m <- mr_mvlasso(mvinput, orientate = 1)
sink(file = file.path(output_folder, "mr_mvlasso.txt"))
m
sink(file = NULL)
saveRDS(m, file.path(output_folder, "mr_mvlasso.rds"))