---
title: "R Notebook"
output: html_notebook
---
```{r}
library(tidyverse)
library(ggplot2)
library(ggbeeswarm)
```
Recall, the supertable file has the read depth at every position (0-indexed) and the alt allele count at each position (where a lack of mutation is given by a 0). This file does not contain any mutations present at the haplotype sites
```{r}
outdir_figures = "~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/experimental_design/figures"
outdir_files <- "~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/experimental_design/files"
supertable_file = "~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/input_files/supertable.txt"
supertable = read.table(supertable_file, header = TRUE, stringsAsFactors = FALSE)
```
```{r}
sample_somatic_mut_count = supertable %>%
select(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, REF, ALT, ALT_ALLELE_DEPTH) %>%
#to ensure that we don't double count positions in two genes
unique() %>%
select(SAMPLE, STRAIN, TISSUE, AGE_BIN, ALT_ALLELE_DEPTH) %>%
group_by(SAMPLE, STRAIN, TISSUE, AGE_BIN) %>%
summarise(RAW_SOMATIC_MUT_COUNT = sum(ALT_ALLELE_DEPTH)) %>%
mutate(X = 0)
```
Calculating the somatic mutation count
```{r}
sample_median_count = median(sample_somatic_mut_count$RAW_SOMATIC_MUT_COUNT)
sample_minimum_count = min(sample_somatic_mut_count$RAW_SOMATIC_MUT_COUNT)
sample_total_mut_count = sum(sample_somatic_mut_count$RAW_SOMATIC_MUT_COUNT)
print(sample_median_count)
print(sample_total_mut_count)
```
```{r}
sample_somatic_mut_count_fig = ggplot(sample_somatic_mut_count, aes(x = X, y = RAW_SOMATIC_MUT_COUNT)) +
geom_beeswarm() +
geom_hline(yintercept = median(sample_somatic_mut_count$RAW_SOMATIC_MUT_COUNT), color = "red", linetype = "dashed") +
annotate("text", x = 0.682, y = 53000, label = "Total: 1,171,918 somatic muts", size = 3.45) +
annotate("text", x = 0.667, y = 47000, label = "Median: 7,046 somatic muts ", size = 3.45, color = "red") +
xlab("Samples") +
ylab("Count of somatic mutations") +
scale_x_discrete(breaks = c(0, 0.1)) +
theme_bw() +
theme(axis.text = element_text(size=10),
axis.title = element_text(size=11),
#axis.text.x = element_blank(),
axis.text.y = element_text(size = 10),
strip.background= element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))
pdf(paste(outdir_figures, "/sample_som_mut_count_summary.pdf", sep = ""), width = 4, height = 3)
print(sample_somatic_mut_count_fig)
dev.off()
```
The somatic mutation count per condition
```{r}
condition_som_mut_count = sample_somatic_mut_count %>%
ungroup() %>%
select(STRAIN, TISSUE, AGE_BIN, RAW_SOMATIC_MUT_COUNT) %>%
group_by(STRAIN, TISSUE, AGE_BIN) %>%
summarise(COND_SOM_MUT_COUNT = sum(RAW_SOMATIC_MUT_COUNT))
condition_som_mut_count$STRAIN = factor(condition_som_mut_count$STRAIN, levels = c("B6", "AKR", "ALR", "FVB", "NZB"))
median = median(condition_som_mut_count$COND_SOM_MUT_COUNT)
total_cond_count = sum(condition_som_mut_count$COND_SOM_MUT_COUNT)
print(median)
print(total_cond_count)
```
```{r}
cond_som_mut_count = ggplot(condition_som_mut_count) +
geom_beeswarm(aes(x = STRAIN, y = COND_SOM_MUT_COUNT, color = STRAIN)) +
annotate("text", x = 2.2, y = 1.92e+05, label = "Total: 1.2M", size = 3.5) +
annotate("text", x = 2.65, y = 1.55e+05, label = "Median: 40,449", size = 3.5) +
xlab("Strains") +
ylab("Count of somatic mutations") +
theme_bw(base_size = 16) +
scale_color_manual(values = c("B6"= "#1d457f","AKR" = "#cc5c76", "ALR" = "#2f9e23", "FVB" = "#f57946", "NZB" = "#f7c22d")) +
scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", math_format(10^.x))) +
theme(axis.text = element_text(size=11),
axis.title = element_text(size=12),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 11),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
strip.background=element_blank(),
strip.text = element_text(face = "bold"),
text = element_text(family = "sans"),
legend.position = "none")
pdf(paste(outdir_figures, "/cond_raw_som_mut_count.pdf", sep = ""), width = 2.35, height = 4)
print(cond_som_mut_count)
dev.off()
```
Writing out files:
1) sample somatic mutation count
2) condition somatic mutation count
```{r}
write.table(sample_somatic_mut_count %>% select(SAMPLE, STRAIN, TISSUE, AGE_BIN, RAW_SOMATIC_MUT_COUNT), file = paste(outdir_files,"/sample_somatic_mut_count.txt", sep = ""), sep = "\t", quote = F, row.names = F)
write.table(condition_som_mut_count %>% select(STRAIN, TISSUE, AGE_BIN, COND_SOM_MUT_COUNT), file = paste(outdir_files,"/experimental_condition_somatic_mut_count.txt", sep = ""), sep = "\t", quote = F, row.names = F)
```