--- title: "R Notebook" output: html_notebook --- ```{r} library(tidyverse) library(ggplot2) ``` ```{r} setwd("/Users/isabelserrano/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/") outdir_figures = "/Users/isabelserrano/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/experimental_design/figures/" outdir_files = "/Users/isabelserrano/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/experimental_design/files/" nonreversion_muts_file ="/Users/isabelserrano/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/input_files/somatic_mutations.vcf" nonreversion_muts = read.table(nonreversion_muts_file, header=TRUE, stringsAsFactors = FALSE) reversion_muts_file = "/Users/isabelserrano/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/input_files/haplotype_mutations.vcf" reversion_muts = read.table(reversion_muts_file, header=TRUE, stringsAsFactors = FALSE) ``` Combine all our variants info: ```{r} all_variants = rbind(nonreversion_muts, reversion_muts) ``` ```{r} rm(nonreversion_muts, reversion_muts) ``` ```{r} nd3_hap_site = all_variants %>% select(SAMPLE, STRAIN, TISSUE, START, REF, ALT, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS) %>% filter(START == 9460, REF == "T", ALT == "C") %>% mutate(MUT_FREQ = ALT_ALLELE_DEPTH / READ_DEPTH_AT_POS) nd3_hap_site %>% filter(STRAIN == "B6", TISSUE == "Heart") ``` ```{r} nd3_site_plot = ggplot(nd3_hap_site, aes(x = SAMPLE, y = MUT_FREQ, color = STRAIN, shape = TISSUE)) nd3_site_plot = nd3_site_plot + geom_point() + theme_bw() + ylab("Mutation\n Frequency") + xlab("Sample") + scale_color_manual(values = c("B6"= "#1d457f","AKR" = "#cc5c76", "ALR" = "#2f9e23", "F" = "#f57946", "NZB" = "#f7c22d")) + theme(strip.background=element_blank(), text = element_text(family = "sans"), axis.text.x = element_text(angle = 90, size = 6), legend.position = "bottom") pdf(paste(outdir_figures,"/nd3_hap_site_freq.pdf",sep=""),width=9,height=3) print(nd3_site_plot) dev.off() ``` ```{r} alr_hap_sites = all_variants %>% select(SAMPLE, STRAIN, TISSUE, START, REF, ALT, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS) %>% filter((START == 4738 & REF == "C" & ALT == "A") | (START == 9347 & REF == "G" & ALT == "A")) %>% mutate(MUT_FREQ = ALT_ALLELE_DEPTH / READ_DEPTH_AT_POS) alr_hap_sites %>% filter(STRAIN == "B6") ``` ```{r} alr_hap_sites_plot = ggplot(alr_hap_sites, aes(x = SAMPLE, y = MUT_FREQ, color = STRAIN, shape = TISSUE)) alr_hap_sites_plot = alr_hap_sites_plot + geom_point() + theme_bw() + facet_wrap(~START) + ylab("Mutation\n Frequency") + xlab("Sample") + scale_color_manual(values = c("B6"= "#1d457f","AKR" = "#cc5c76", "ALR" = "#2f9e23", "F" = "#f57946", "NZB" = "#f7c22d")) + theme(strip.background=element_blank(), text = element_text(family = "sans"), axis.text.x = element_text(angle = 90, size = 6), legend.position = "bottom") pdf(paste(outdir_figures,"/alr_hapsites_freq.pdf",sep=""),width=9,height=3) print(alr_hap_sites_plot) dev.off() ``` ```{r} fvb_hap_sites = all_variants %>% select(SAMPLE, STRAIN, TISSUE, START, REF, ALT, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS) %>% filter(START == 7777 & REF == "G" & ALT == "T") %>% mutate(MUT_FREQ = ALT_ALLELE_DEPTH / READ_DEPTH_AT_POS) fvb_hap_sites %>% filter(STRAIN == "B6") ``` ```{r} fvb_hap_sites_plot = ggplot(fvb_hap_sites, aes(x = SAMPLE, y = MUT_FREQ, color = STRAIN, shape = TISSUE)) fvb_hap_sites_plot = fvb_hap_sites_plot + geom_point() + theme_bw() + facet_wrap(~START) + ylab("Mutation\n Frequency") + xlab("Sample") + scale_color_manual(values = c("B6"= "#1d457f","AKR" = "#cc5c76", "ALR" = "#2f9e23", "F" = "#f57946", "NZB" = "#f7c22d")) + theme(strip.background=element_blank(), text = element_text(family = "sans"), axis.text.x = element_text(angle = 90, size = 6), legend.position = "bottom") pdf(paste(outdir_figures,"/fvb_hapsites_freq.pdf",sep=""),width=9,height=3) print(fvb_hap_sites_plot) dev.off() ``` ```{r} akr_het_site = all_variants %>% select(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS) %>% filter(START > 5165, START < 5175 ) %>% group_by(STRAIN, TISSUE, AGE_BIN, SAMPLE, START, READ_DEPTH_AT_POS) %>% summarise(SUM_ALT = sum(ALT_ALLELE_DEPTH)) %>% mutate(MUT_FREQ = SUM_ALT / READ_DEPTH_AT_POS) akr_het_site ``` ```{r} library(PNWColors) bay_pal <- pnw_palette(name="Bay", type="discrete") akr_het_site_plot = ggplot(akr_het_site, aes(x = START, y = MUT_FREQ, shape = AGE_BIN)) akr_het_site_plot = akr_het_site_plot + geom_point(aes(color = TISSUE)) + scale_shape_manual(values = c(4, 19, 1)) + theme_bw() + facet_wrap(~STRAIN, nrow = 1) + ylab("Mutation\n Frequency") + xlab("Position (bp)") + scale_color_manual(name = "Tissue" , values = bay_pal[c(1,5,4)]) + theme(strip.background=element_blank(), text = element_text(family = "sans"), axis.text.x = element_text(angle = 90, size = 6), legend.position = "bottom") pdf(paste(outdir_figures,"/akr_hapsites_freq.pdf",sep=""),width=9,height=3) print(akr_het_site_plot) dev.off() ``` ```{r} #all of the NZB haplotypes are called except for the two indels and the heteroplasmy at 7475 because it's max freq is 0.8 all_variants %>% filter(STRAIN == "NZB") %>% mutate(MUT_FREQ = ALT_ALLELE_DEPTH / READ_DEPTH_AT_POS) %>% filter(MUT_FREQ > 0.9) %>% select(START) %>% unique() ``` ```{r} #we call all the haplosites and unfortunately also call it for B6 at ND3 all_variants %>% mutate(MUT_FREQ = ALT_ALLELE_DEPTH / READ_DEPTH_AT_POS) %>% filter(MUT_FREQ > 0.95) %>% select(STRAIN, START) %>% unique() ```