---
title: "R Notebook"
output: html_notebook
---
```{r}
library(tidyverse)
library(ggplot2)
library(gridExtra)
```
```{r}
outdir_figures <- "~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/experimental_design/figures"
supertable_file <-"~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/input_files/supertable.txt"
supertable <- read.table(supertable_file, header=TRUE, stringsAsFactors = FALSE)
coordinates_file <- "/Users/isabelserrano/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/input_files/Mouse_Mt_Genome_Coordinates"
coordinates <- read.table(coordinates_file, stringsAsFactors = FALSE, sep = "\t")
```
Creating our map of the linear mt-genome
```{r}
#adding the D-Loop coordinates to our file
coordinates[nrow(coordinates) + 1, ] <- c("D-Loop", 15422, 16299)
#adding column names to dataframe
colnames(coordinates) <- c("GENE", "START", "END")
coordinates$START <- as.numeric(coordinates$START)
coordinates$END <- as.numeric(coordinates$END)
#so things don't get wonky if the coordinates are read as strings -- also we convert our coordinates to be 0-indexed
coordinates$START <- as.numeric(coordinates$START) - 1
coordinates$END <- as.numeric(coordinates$END) - 1
coordinates = coordinates %>%
mutate(TYPE = case_when(grepl("mt-R", GENE) ~ "rRNA",
grepl("mt-T", GENE) ~ "tRNA",
GENE == "D-Loop" ~ "D-Loop",
GENE == "mt-OL" ~ "OriL",
TRUE ~ "Protein"))
```
```{r}
num_segs = length(coordinates$GENE)
#important part here is to include the very last coordinate on the mt-genome map otherwise we miss our "last x coord" at the end of our map
x = c(coordinates$START, 16299)
xs = c(x[1:num_segs], x[1:num_segs], x[-1], x[-1], x[1:num_segs])
ys = c(rep(0,num_segs), rep(1,num_segs), rep(1,num_segs), rep(0,num_segs), rep(0,num_segs))
idx = rep(seq(1,num_segs),5)
type = rep(coordinates$TYPE, 5)
mt_genome_map = data.frame(x_pos = xs, y = ys, idx = idx, type = type) %>%
arrange(idx)
#alternating the thickness of the protein regions to match the circular mt-genome map
mt_genome_map = mt_genome_map %>%
mutate(y = ifelse(type == "Protein", ifelse(y == 0, -0.25, 1.25), y))
```
Plotting the linear map of the mt-genome
```{r}
library(RColorBrewer)
lake_pal <- pnw_palette(name="Lake", type="discrete")
spectral = brewer.pal(11,"Spectral")
PRGn = brewer.pal(11,"PRGn")
set1 = brewer.pal(3,"Set1")
PuRd = brewer.pal(11,"PuRd")
Blues = brewer.pal(9, "Blues")
colors = c(Blues[9], set1[1], lake_pal[6], PuRd[6],PRGn[4])
```
```{r}
linear_map = ggplot(mt_genome_map)
linear_map = linear_map +
geom_polygon(aes(x=x_pos, y=y, group=idx, fill=type)) +
theme_bw(base_size=16) +
theme(legend.position="None",
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
panel.grid=element_blank()) +
scale_y_continuous("") +
scale_x_continuous("Position") +
scale_fill_manual(name = "Genomic Region", values = colors) +
theme(strip.background = element_blank(),
strip.text=element_blank(),
text = element_text(family = "sans"),
panel.border = element_blank(),
axis.title.x = element_text(size = 14),
legend.position = "none")
print(linear_map)
```
Now plotting the mutation frequency at each position for each condition -- but make it gray so it all blends in
Note: We filter OUT high frequency positions > 1e-3
```{r}
mut_freq = supertable %>%
select(START, CONDITION_MUT_FREQ_AT_POS) %>%
filter(CONDITION_MUT_FREQ_AT_POS < 1e-3) %>%
unique() %>%
filter(CONDITION_MUT_FREQ_AT_POS > 0)
```
Plotting!
```{r}
mut_freq_across_genome = ggplot(mut_freq)
mut_freq_across_genome = mut_freq_across_genome +
geom_point(aes(x = START, y = CONDITION_MUT_FREQ_AT_POS), color = "gray", size = 0.3) +
ylab("Mutation Frequency \n at Position") +
xlab("") +
scale_y_continuous(labels = scientific_format(digits = 2), breaks = seq(min(mut_freq$CONDITION_MUT_FREQ_AT_POS), 0.00125, by = 2.5e-4)) +
#ylim(min(dot_plot_df$CONDITION_MUT_FREQ_AT_POS), 0.001) +
theme_classic(base_size = 16) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
legend.position = "None")
png(paste(outdir_figures,"/wo_outliers_mut_freq_across_genome.png",sep=""),width=11.5,height=2, unit='in',res=800)
print(mut_freq_across_genome)
dev.off()
```
Without outliers (main figure)
```{r}
#margin is top, r, b, l
gA <- ggplotGrob(mut_freq_across_genome+theme(plot.margin = unit(c(1,1,0,1), "cm")))
gB <- ggplotGrob(linear_map+theme(plot.margin = unit(c(-.5,1,.5,1), "cm")))
maxWidth = grid::unit.pmax(gA$widths[2:5], gB$widths[2:5])
gA$widths[2:5] <- as.list(maxWidth)
gB$widths[2:5] <- as.list(maxWidth)
png(paste(outdir_figures,"/wo_outliers_mut_freq_across_genome_w_map.png",sep=""),width=11.5,height=3, unit='in',res=800)
grid.arrange(gA,gB,heights=c(3,1))
dev.off()
```