---
title: "R Notebook"
output: html_notebook
---
```{r}
library(tidyverse)
library(ggplot2)
library(scales)
library(ggridges)
```
```{r}
setwd("~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/reversions/scripts/")
length_mtdna = 16299
outdir_figures = "../figures/"
outdir_files = "../files/"
#this file DOES NOT CONTAIN HAPLOTYPE SITES
supertable_file = "../../input_files/supertable.txt"
supertable = read.table(supertable_file, header=TRUE, stringsAsFactors = FALSE)
#this file contains REVERSION haplotype sites (ALT > B6 REF)
haplotype_reversions_file = "../files/corrected_reversion_counts.txt"
haplotype_reversions = read.table(haplotype_reversions_file, header = TRUE, stringsAsFactors = FALSE)
#this file contains OTHER ALT alleles at haplotype sites (not B6 allele = reversion)
other_alt_alleles_haplotype_sites_file = "../../input_files/haplotype_sites_nonreversion_muts.txt"
other_alt_alleles_haplotype_sites = read.table(other_alt_alleles_haplotype_sites_file, header = TRUE, stringsAsFactors = FALSE)
```
Calculating the reversion allele frequency
```{r}
reversion_count = haplotype_reversions %>%
#note: the reversion allele freq is the ref allele depth -- B6 allele
select(STRAIN, TISSUE, AGE_BIN, START, FLOORED_CORR_REV_ALLELE_DEPTH, CORR_READ_DEPTH) %>%
mutate(SITE_TYPE = "HAPLO_SITE") %>%
rename("COND_REV_ALLELE_DEPTH" = FLOORED_CORR_REV_ALLELE_DEPTH)
```
Calculating the other allele frequencies, including the haplotype site non-reversion alleles
1) Let's bind the supertable and non-reversion alleles at haplotype sites
```{r}
other_alt_alleles_haplotype_sites = other_alt_alleles_haplotype_sites %>%
mutate(STRAIN = recode(STRAIN, "F" = "FVB")) %>%
select(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, REF, ALT, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS)
#information for background sites
somatic_muts_nonhaplotype_sites = supertable %>%
select(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, REF, ALT, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS)
somatic_nonreversion = rbind(somatic_muts_nonhaplotype_sites, other_alt_alleles_haplotype_sites)
```
```{r}
rm(other_alt_alleles_haplotype_sites, supertable, somatic_muts_nonhaplotype_sites, haplotype_reversions)
```
We want to calculate the mutation count at each position (i.e. aggregate the count of all possible occuring mutations)
```{r}
somatic_mut_count = somatic_nonreversion %>%
#here we eliminate redundancy from the same start position being in two gene regions
unique() %>%
#first we aggregate the number of mutations at a site within a sample (i.e. sum the count of different mutation types)
select(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, ALT_ALLELE_DEPTH, READ_DEPTH_AT_POS) %>%
group_by(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, READ_DEPTH_AT_POS) %>%
summarise(SAMPLE_SOMATIC_MUT_COUNT = sum(ALT_ALLELE_DEPTH)) %>%
ungroup() %>%
#now we want to aggregate the read depth and the mut count in a condition
select(STRAIN, TISSUE, AGE_BIN, START, SAMPLE_SOMATIC_MUT_COUNT, READ_DEPTH_AT_POS) %>%
group_by(STRAIN, TISSUE, AGE_BIN, START) %>%
summarise(COND_SOMATIC_MUT_COUNT = sum(SAMPLE_SOMATIC_MUT_COUNT), COND_READ_DEPTH_AT_POS = sum(READ_DEPTH_AT_POS))
```
```{r}
merged_mutations = full_join(somatic_mut_count, reversion_count, by = c("STRAIN", "TISSUE", "AGE_BIN", "START"))
```
```{r}
#we have filled in the fields so that we can note 0 if there is not a somatic mutation present, 0 if there is not a reversion mutation present, and the site type if it's not a haplotype site
filtered_merged_mutations = merged_mutations %>%
#we filter out B6 since we are only interested in the conplastic strains
filter(STRAIN != "B6") %>%
mutate(COND_SOMATIC_MUT_COUNT = ifelse(is.na(COND_SOMATIC_MUT_COUNT), 0, COND_SOMATIC_MUT_COUNT),
COND_REV_ALLELE_DEPTH = ifelse(is.na(COND_REV_ALLELE_DEPTH), 0, COND_REV_ALLELE_DEPTH),
CORR_READ_DEPTH = ifelse(is.na(CORR_READ_DEPTH), COND_READ_DEPTH_AT_POS, CORR_READ_DEPTH),
SITE_TYPE = ifelse(is.na(SITE_TYPE), "NOT_HAPLO", SITE_TYPE)) %>%
select(STRAIN, TISSUE, AGE_BIN, START, SITE_TYPE, COND_SOMATIC_MUT_COUNT, COND_REV_ALLELE_DEPTH, CORR_READ_DEPTH)
```
```{r}
mut_freq = filtered_merged_mutations %>%
mutate(MUT_COUNT = COND_SOMATIC_MUT_COUNT + COND_REV_ALLELE_DEPTH) %>%
mutate(MUT_FREQ_AT_POS = MUT_COUNT / CORR_READ_DEPTH)
```
We want to calculate the % of alleles at haplotype sites that are reversion alleles
```{r}
mut_freq %>%
ungroup() %>%
select(STRAIN, SITE_TYPE, COND_REV_ALLELE_DEPTH, MUT_COUNT) %>%
filter(SITE_TYPE == "HAPLO_SITE") %>%
filter(MUT_COUNT > 0) %>%
mutate(PROP_REV = COND_REV_ALLELE_DEPTH/MUT_COUNT) %>%
select(STRAIN, PROP_REV) %>%
group_by(STRAIN) %>%
summarise(AVG = mean(PROP_REV))
```
Normalizing mutation frequency for sequencing depth so that we can compare mutation freq across ages
```{r}
normalized_mut_freq = mut_freq %>%
ungroup() %>%
select(STRAIN, TISSUE, AGE_BIN, START, SITE_TYPE, MUT_FREQ_AT_POS, CORR_READ_DEPTH) %>%
mutate(MIN_FREQ_AT_POS = 1/CORR_READ_DEPTH) %>%
group_by(STRAIN, TISSUE, START, SITE_TYPE) %>%
mutate(FLOOR_FREQ_AT_POS_BW_AGE = max(MIN_FREQ_AT_POS)) %>%
mutate(NORM_MUT_FREQ_AT_POS = ifelse(MUT_FREQ_AT_POS < FLOOR_FREQ_AT_POS_BW_AGE, 0, MUT_FREQ_AT_POS)) %>%
ungroup() %>%
select(STRAIN, TISSUE, AGE_BIN, START, SITE_TYPE, MUT_FREQ_AT_POS, NORM_MUT_FREQ_AT_POS) %>%
arrange(STRAIN, TISSUE, AGE_BIN, START)
```
We want to calculate the avg fold difference in mut freq between haplotype sites and background sites
```{r}
normalized_mut_freq %>%
select(STRAIN, SITE_TYPE, MUT_FREQ_AT_POS) %>%
group_by(STRAIN, SITE_TYPE) %>%
#we take the average across tissues
summarise(AVG = mean(MUT_FREQ_AT_POS)) %>%
pivot_wider(names_from = SITE_TYPE, values_from = AVG) %>%
mutate(FOLD_DIFF = HAPLO_SITE/NOT_HAPLO)
```
Exporting for future analyses
```{r}
setwd("~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/reversions")
outdir_files = "files"
write.table(normalized_mut_freq, file = paste(outdir_files,"/corr_NUMT_freqs_normalized_mut_freq_per_pos_w_reversions.txt", sep = ""), sep = "\t", quote = F, row.names = F)
```
Refactoring so that wildtype strain is the first in figure
```{r}
normalized_mut_freq$STRAIN = factor(normalized_mut_freq$STRAIN, level = c("AKR", "ALR", "FVB", "NZB"))
```
Statistics: wilcoxon test to compare the averages of the background and haplotype sites for NZB
We have to run this test for every age and tissue combination
```{r}
nzb_wilcoxon = normalized_mut_freq %>%
filter(STRAIN == "NZB") %>%
mutate(LABEL = paste(AGE_BIN, TISSUE, SITE_TYPE, sep = "_"))
```
```{r}
groups = list(c("OLD_Brain_NOT_HAPLO", "OLD_Brain_HAPLO_SITE"),
c("YOUNG_Brain_NOT_HAPLO", "YOUNG_Brain_HAPLO_SITE"),
c("OLD_Heart_NOT_HAPLO", "OLD_Heart_HAPLO_SITE"),
c("YOUNG_Heart_NOT_HAPLO", "YOUNG_Heart_HAPLO_SITE"),
c("OLD_Liver_NOT_HAPLO", "OLD_Liver_HAPLO_SITE"),
c("YOUNG_Liver_NOT_HAPLO", "YOUNG_Liver_HAPLO_SITE"))
```
```{r}
combos = groups %>%
set_names(map_chr(., ~ paste(., collapse = "_")))
```
Now to run the Wilcoxon Rank Sum Test
```{r}
p_values_nzb = map_df(combos, function(y) {
filter(nzb_wilcoxon, LABEL %in% y) %>%
wilcox.test(NORM_MUT_FREQ_AT_POS ~ LABEL, data = .) %>%
broom::tidy()
}, .id = "contrast") %>%
mutate(p_adj = p.adjust(p.value, method = "BH")) %>%
mutate(SIG_STATUS = ifelse(p_adj < 0.01, "SIG", "N.S.")) %>%
mutate(STRAIN = "NZB") %>%
mutate(TISSUE = case_when(grepl("Brain", contrast) ~ "Brain",
grepl("Heart", contrast) ~ "Heart",
TRUE ~ "Liver")) %>%
mutate(AGE_BIN = ifelse(grepl("OLD", contrast), "OLD", "YOUNG")) %>%
mutate(P_ADJ_LABEL = scientific(p_adj, digits = 2))
```
We create a label for the norm mut freq at each haplotype position in a given condition for all strains except NZB
```{r}
comp_haplo_site_freq = normalized_mut_freq %>%
filter(SITE_TYPE == "HAPLO_SITE") %>%
filter(STRAIN != "NZB") %>%
select(STRAIN, TISSUE, AGE_BIN, START, NORM_MUT_FREQ_AT_POS) %>%
mutate(LABEL = paste(STRAIN, START, sep = "_")) %>%
rename(HAPLO_MUT_FREQ = NORM_MUT_FREQ_AT_POS) %>%
select(STRAIN, TISSUE, AGE_BIN, LABEL, HAPLO_MUT_FREQ)
```
```{r}
FVB_hap_sites = normalized_mut_freq %>%
filter(STRAIN == "FVB") %>%
filter(SITE_TYPE == "NOT_HAPLO") %>%
left_join((comp_haplo_site_freq %>% filter(STRAIN == "FVB")), by = c("STRAIN", "TISSUE", "AGE_BIN")) %>%
pivot_wider(names_from = LABEL, values_from = HAPLO_MUT_FREQ) %>%
group_by(TISSUE, AGE_BIN) %>%
mutate(TOTAL_POS = n()) %>%
ungroup() %>%
mutate(fvb_count_7777 = ifelse(NORM_MUT_FREQ_AT_POS > FVB_7777, 1, 0), fvb_count_9460 = ifelse(NORM_MUT_FREQ_AT_POS > FVB_9460, 1, 0)) %>%
select(STRAIN, TISSUE, AGE_BIN, fvb_count_7777, fvb_count_9460, TOTAL_POS) %>%
pivot_longer(cols= fvb_count_7777:fvb_count_9460, names_to = "HAP_LABEL", values_to = "COUNT_ABOVE") %>%
group_by(STRAIN, TISSUE, AGE_BIN, TOTAL_POS, HAP_LABEL) %>%
summarise(TOTAL_COUNT_ABOVE_FREQ = sum(COUNT_ABOVE)) %>%
mutate(p_val = TOTAL_COUNT_ABOVE_FREQ/TOTAL_POS)
```
```{r}
ALR_hap_sites = normalized_mut_freq %>%
filter(STRAIN == "ALR") %>%
filter(SITE_TYPE == "NOT_HAPLO") %>%
left_join((comp_haplo_site_freq %>% filter(STRAIN == "ALR")), by = c("STRAIN", "TISSUE", "AGE_BIN")) %>%
pivot_wider(names_from = LABEL, values_from = HAPLO_MUT_FREQ) %>%
group_by(TISSUE, AGE_BIN) %>%
mutate(TOTAL_POS = n()) %>%
ungroup() %>%
mutate(alr_count_4738 = ifelse(NORM_MUT_FREQ_AT_POS > ALR_4738, 1, 0), alr_count_9347 = ifelse(NORM_MUT_FREQ_AT_POS > ALR_9347, 1, 0), alr_count_9460 = ifelse(NORM_MUT_FREQ_AT_POS > ALR_9460, 1, 0)) %>%
select(STRAIN, TISSUE, AGE_BIN, alr_count_4738, alr_count_9347, alr_count_9460, TOTAL_POS) %>%
pivot_longer(cols= alr_count_4738:alr_count_9460, names_to = "HAP_LABEL", values_to = "COUNT_ABOVE") %>%
group_by(STRAIN, TISSUE, AGE_BIN, TOTAL_POS, HAP_LABEL) %>%
summarise(TOTAL_COUNT_ABOVE_FREQ = sum(COUNT_ABOVE)) %>%
mutate(p_val = TOTAL_COUNT_ABOVE_FREQ/TOTAL_POS)
```
```{r}
AKR_hap_sites = normalized_mut_freq %>%
filter(STRAIN == "AKR") %>%
filter(SITE_TYPE == "NOT_HAPLO") %>%
left_join((comp_haplo_site_freq %>% filter(STRAIN == "AKR")), by = c("STRAIN", "TISSUE", "AGE_BIN")) %>%
pivot_wider(names_from = LABEL, values_from = HAPLO_MUT_FREQ) %>%
group_by(TISSUE, AGE_BIN) %>%
mutate(TOTAL_POS = n()) %>%
ungroup() %>%
mutate(akr_count_9460 = ifelse(NORM_MUT_FREQ_AT_POS > AKR_9460, 1, 0)) %>%
select(STRAIN, TISSUE, AGE_BIN, akr_count_9460, TOTAL_POS) %>%
pivot_longer(cols= akr_count_9460, names_to = "HAP_LABEL", values_to = "COUNT_ABOVE") %>%
group_by(STRAIN, TISSUE, AGE_BIN, TOTAL_POS, HAP_LABEL) %>%
summarise(TOTAL_COUNT_ABOVE_FREQ = sum(COUNT_ABOVE)) %>%
mutate(p_val = TOTAL_COUNT_ABOVE_FREQ/TOTAL_POS)
```
```{r}
point_background_comp = rbind(FVB_hap_sites, ALR_hap_sites, AKR_hap_sites) %>%
ungroup() %>%
select(STRAIN, TISSUE, AGE_BIN, HAP_LABEL, p_val) %>%
separate(HAP_LABEL, sep = "_", c("L_STRAIN", "COUNT","START")) %>%
select(STRAIN, TISSUE, AGE_BIN, START, p_val) %>%
mutate(p_adj = p.adjust(p_val, method = "BH")) %>%
mutate(SIG_STATUS = ifelse(p_adj < 0.01, "SIG", "N.S.")) %>%
arrange(desc(SIG_STATUS))
point_background_comp$START = as.numeric(point_background_comp$START)
```
```{r}
dist_mut_freq_df = normalized_mut_freq %>%
filter(NORM_MUT_FREQ_AT_POS > 0) %>%
#we keep HFPs in our calculations
#filter(NORM_MUT_FREQ_AT_POS < 1e-3) %>%
mutate(NEW_SITE_LABEL = ifelse(AGE_BIN == "OLD",
ifelse(SITE_TYPE == "HAPLO_SITE", "Old Haplotype Site", "Old Non-Haplotype Site"),
ifelse(SITE_TYPE == "HAPLO_SITE", "Young Haplotype Site", "Young Non-Haplotype Site"))) %>%
mutate(GEOM_TYPE = ifelse(SITE_TYPE == "HAPLO_SITE", ifelse(STRAIN == "NZB", "DIST", "POINT"), "DIST"))
plotting_dist_df = dist_mut_freq_df %>%
mutate(AGE_LABEL = recode(AGE_BIN, "YOUNG" = "Young", "OLD" = "Old"))
```
```{r}
library(stringr)
plotting_point_labels = point_background_comp %>%
filter(SIG_STATUS == "SIG") %>%
mutate(X_POS = -2.5) %>%
#mutate(X_POS = ifelse(STRAIN == "ALR", -2.5, X_POS)) %>%
#mutate(X_POS = ifelse(STRAIN == "ALR" & TISSUE == "Brain", -4, X_POS)) %>%
select(STRAIN, TISSUE, AGE_BIN, START, SIG_STATUS, X_POS) %>%
group_by(STRAIN, TISSUE, AGE_BIN, SIG_STATUS, X_POS) %>%
#super cool move -- paste entries together from a group into one label
summarise(LABEL = str_c(START, collapse=",")) %>%
mutate(X_POS = ifelse(grepl(",", LABEL), -2.65, X_POS ))
plotting_asteriks = point_background_comp %>%
filter(SIG_STATUS == "SIG") %>%
left_join(plotting_dist_df, by = c("STRAIN", "TISSUE", "AGE_BIN", "START")) %>%
select(STRAIN, TISSUE, AGE_BIN, START, p_adj, NORM_MUT_FREQ_AT_POS) %>%
mutate(P_VAL_LABEL = scientific(p_adj, digits = 2)) %>%
mutate(X_POS = ifelse(log10(NORM_MUT_FREQ_AT_POS) < -3.25, -3.5, -2.85)) %>%
mutate(X_POS = ifelse((STRAIN == "ALR" & TISSUE == "Brain" & AGE_BIN == "YOUNG"), -4, X_POS)) %>%
mutate(P_VAL_LABEL = ifelse((STRAIN == "ALR" & TISSUE == "Brain" & AGE_BIN == "OLD" & START == 9347), "", P_VAL_LABEL)) %>%
mutate(X_POS = ifelse((STRAIN == "ALR" & TISSUE == "Brain" & AGE_BIN == "OLD" & START == 4738), -2.8, X_POS)) %>%
mutate(P_VAL_LABEL = ifelse((STRAIN == "ALR" & TISSUE == "Brain" & AGE_BIN == "OLD" & START == 4738), paste("**", P_VAL_LABEL, sep = ""), P_VAL_LABEL)) %>%
mutate(X_POS = ifelse((STRAIN == "ALR" & TISSUE == "Heart" & AGE_BIN == "OLD" & START == 4738), -2.8, X_POS)) %>%
mutate(P_VAL_LABEL = ifelse((STRAIN == "ALR" & TISSUE == "Heart" & AGE_BIN == "OLD" & START == 4738), paste("**", P_VAL_LABEL, sep = ""), P_VAL_LABEL)) %>%
mutate(P_VAL_LABEL = ifelse((STRAIN == "ALR" & TISSUE == "Heart" & AGE_BIN == "OLD" & START == 9347), "", P_VAL_LABEL))
```
Plotting now!
```{r}
dist = ggplot(plotting_dist_df)
dist = dist +
geom_density_ridges(data = subset(plotting_dist_df, plotting_dist_df$GEOM_TYPE == "DIST"), aes(x = log10(NORM_MUT_FREQ_AT_POS), y = AGE_LABEL, fill = NEW_SITE_LABEL), alpha=0.5,color='black',scale=3,size=0.01) +
geom_point(data = subset(plotting_dist_df, plotting_dist_df$GEOM_TYPE == "POINT"), aes(x = log10(NORM_MUT_FREQ_AT_POS), y = AGE_LABEL, color = NEW_SITE_LABEL)) +
geom_text(data = plotting_asteriks, aes(x = X_POS, y = ifelse(AGE_BIN == "OLD", 1.55, 2.55), label = P_VAL_LABEL), size = 2.75) +
#geom_text(data = plotting_point_labels, aes(x = X_POS , y = ifelse(AGE_BIN == "OLD", 1.5, 2.5), label = LABEL, size = 3), size = 2) +
geom_text(data = p_values_nzb, aes(x = ifelse((TISSUE == "Brain" & AGE_BIN == "YOUNG"), -2.75 ,-3), y = ifelse(AGE_BIN == "OLD", 1.55, 2.55), label = P_ADJ_LABEL), size = 2.75, inherit.aes = FALSE) +
scale_fill_manual(name = "Age", values = c("#018571", "gray60", "#80CDC1", "gray80")) +
scale_color_manual(values = c("#018571", "#80CDC1"), guide = "none") +
xlab("Mutation Frequency [log10]") +
ylab("Age and Site Type") +
theme_bw(base_size = 16) +
facet_grid(STRAIN ~ TISSUE) +
theme(panel.grid=element_blank(),
text = element_text(family = "sans"),
panel.border = element_blank(),
strip.background = element_blank(),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
strip.text.x = element_text(size = 11),
strip.text.y = element_text(size = 11),
axis.text.y=element_text(size = 10),
axis.text.x =element_text(size = 10),
legend.position = "none") +
coord_cartesian(xlim=c(-5.25,-2.5))
print(dist)
pdf(paste(outdir_figures,"/corr_NUMT_freqs_dist.pdf",sep=""),width=8,height=4)
print(dist)
dev.off()
pdf(paste(outdir_figures,"/leg_corr_NUMT_freqs_dist.pdf",sep=""),width=12,height=4)
print(dist + theme(legend.position = "bottom"))
dev.off()
```
Plotting the mutation freq per position for young and old, where we also highlight the location of the NUMT region
```{r}
young_muts = normalized_mut_freq %>%
filter(AGE_BIN == "YOUNG") %>%
filter(NORM_MUT_FREQ_AT_POS > 0)
y_seg_pos = min(young_muts$NORM_MUT_FREQ_AT_POS)
young_mut_freq = ggplot(young_muts)
young_mut_freq = young_mut_freq +
geom_point(data = subset(young_muts, young_muts$SITE_TYPE == "NOT_HAPLO") , aes(x = START, y = NORM_MUT_FREQ_AT_POS, color = SITE_TYPE, size = NORM_MUT_FREQ_AT_POS), alpha = 0.75) +
#plotted this way so that haplo sites will be in front of other sites in the image
geom_point(data = subset(young_muts, young_muts$SITE_TYPE == "HAPLO_SITE") , aes(x = START, y = NORM_MUT_FREQ_AT_POS, color = SITE_TYPE, size = NORM_MUT_FREQ_AT_POS), alpha = 0.75) +
geom_segment(x = 6394, xend = 11042, y = log10(y_seg_pos) , yend = log10(y_seg_pos)) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", math_format(10^.x))) +
xlab("Position in the mt-genome (bp)") +
ylab("Mutation Frequency at Position") +
scale_color_manual(name = "Site Type", labels = c("Haplotype Site", "Other"), values = c("#018571", "gray80")) +
facet_grid(STRAIN ~ TISSUE) +
theme_bw(base_size = 16) +
theme(strip.background = element_blank(),
text = element_text(family = "sans"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
strip.text.x = element_text(size = 11),
strip.text.y = element_text(size = 11),
axis.text.y=element_text(size = 10),
axis.text.x =element_text(size = 10),
legend.position = "none")
png(paste(outdir_figures,"/corr_NUMT_freqs_young_mut_freq_w_haplotypes.png",sep=""),width=7,height=5,unit='in',res=800)
print(young_mut_freq)
dev.off()
```
```{r}
old_muts = normalized_mut_freq %>%
filter(AGE_BIN == "OLD") %>%
filter(NORM_MUT_FREQ_AT_POS > 0)
y_seg_pos = min(old_muts$NORM_MUT_FREQ_AT_POS)
old_mut_freq = ggplot(old_muts)
old_mut_freq = old_mut_freq +
geom_point(data = subset(old_muts, old_muts$SITE_TYPE == "NOT_HAPLO") , aes(x = START, y = NORM_MUT_FREQ_AT_POS, color = SITE_TYPE, size = NORM_MUT_FREQ_AT_POS), alpha = 0.75) +
geom_point(data = subset(old_muts, old_muts$SITE_TYPE == "HAPLO_SITE") , aes(x = START, y = NORM_MUT_FREQ_AT_POS, color = SITE_TYPE, size = NORM_MUT_FREQ_AT_POS), alpha = 0.75) +
geom_segment(x = 6394, xend = 11042, y = log10(y_seg_pos) , yend = log10(y_seg_pos)) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", math_format(10^.x))) +
xlab("Position in the mt-genome (bp)") +
ylab("Mutation Frequency at Position") +
scale_color_manual(name = "Site Type", labels = c("Haplotype Site", "Other"), values = c("#018571", "gray80")) +
facet_grid(STRAIN ~ TISSUE) +
theme_bw(base_size = 16) +
theme(strip.background = element_blank(),
text = element_text(family = "sans"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
strip.text.x = element_text(size = 11),
strip.text.y = element_text(size = 11),
axis.text.y=element_text(size = 10),
axis.text.x =element_text(size = 10),
legend.position = "none")
png(paste(outdir_figures,"/corr_NUMT_freqs_old_mut_freq_w_haplotypes.png",sep=""),width=7,height=5,unit='in',res=800)
print(old_mut_freq)
dev.off()
```