--- title: "R Notebook" output: html_notebook --- ```{r} library(tidyverse) library(ggplot2) ``` ```{r} setwd("~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/checking_NUMT") outdir_figures = "figures/" outdir_files = "files/" read_depth_file = "files/junction_conservative_est_merged_read_depth.txt" read_depth = read.table(read_depth_file, header = FALSE, stringsAsFactors = FALSE) #this file contains the read depth in the NUMT region supertable_file = "../input_files/supertable.txt" supertable = read.table(supertable_file, header = TRUE, stringsAsFactors = FALSE) ``` ```{r} #the amount of reads mapping to the region in chrM given that we masked the NUMT average_depth_chrM_NUMT_region = supertable %>% select(SAMPLE, STRAIN, TISSUE, AGE_BIN, START, READ_DEPTH_AT_POS) %>% unique() %>% select(STRAIN, TISSUE, AGE_BIN, START, READ_DEPTH_AT_POS) %>% group_by(STRAIN, TISSUE, AGE_BIN, START) %>% summarise(COND_READ_DEPTH_AT_POS = sum(READ_DEPTH_AT_POS)) %>% #filtering for just the NUMT region filter(START >= 6394 & START <= 11042) %>% ungroup() %>% #calculating the average read depth in the chrM NUMT region select(STRAIN, TISSUE, AGE_BIN, COND_READ_DEPTH_AT_POS) %>% group_by(STRAIN, TISSUE, AGE_BIN) %>% summarise(AVG_READ_DEPTH = mean(COND_READ_DEPTH_AT_POS)) ``` ```{r} rm(supertable) ``` ```{r} colnames(read_depth) = c("CHRM", "START", "DEPTH", "FILENAME") read_depth = read_depth %>% separate(FILENAME, c("STRAIN", "AGE", "TISSUE"), sep = "_", extra = "drop", fill = "right") ``` ```{r} #our read depth for mapping to the NUMT and chrM #here we did not enforce strict filters but looked at the read depth in junction regions read_depth = read_depth %>% mutate(STRAIN = ifelse(STRAIN == "F", "FVB", STRAIN)) %>% mutate(AGE_BIN = ifelse(grepl("O", AGE), "OLD", "YOUNG")) %>% select(STRAIN, TISSUE, AGE_BIN, CHRM, START, DEPTH) %>% group_by(STRAIN, TISSUE, AGE_BIN, CHRM, START) %>% summarise(COND_DEPTH = sum(DEPTH)) ``` ```{r} chr1_region_labeler = function(position) { start_chrm1_NUMT = 24611535 end_chrm1_NUMT = 24616184 mid_range_chrm1_NUMT_start = 24611535 + 2320 mid_range_chrm1_NUMT_end = 24611535 + 2330 #recall that the NUMT is in the - orientation on chr1 if(position < start_chrm1_NUMT & position >= (start_chrm1_NUMT-10)) { label = "JUNCTION_AFTER"} else if (position > end_chrm1_NUMT & position <= (end_chrm1_NUMT+10)) { label = "JUNCTION_BEFORE"} else if (position >= mid_range_chrm1_NUMT_start & position < mid_range_chrm1_NUMT_end) { label = "MID_RANGE_NUMT"} else if (position <= mid_range_chrm1_NUMT_start & position >= start_chrm1_NUMT) { label = "BEFORE_MID_NUMT"} else { label = "AFTER_MID_NUMT"} return(label) } ``` ```{r} chrM_region_labeler = function(position) { start_chrmM_region = 6394 end_chrmM_region = 11042 mid_range_chrmM_region_start = 6394 + 2320 mid_range_chrmM_region_end = 6394 + 2330 if(position < start_chrmM_region & position >= (start_chrmM_region-10)) { label = "JUNCTION_BEFORE"} else if (position > end_chrmM_region & position <= (end_chrmM_region+10)) { label = "JUNCTION_AFTER"} else if (position >= mid_range_chrmM_region_start & position < mid_range_chrmM_region_end) { label = "MID_RANGE_NUMT"} else if (position <= mid_range_chrmM_region_start & position >= start_chrmM_region) { label = "BEFORE_MID_NUMT"} else { label = "AFTER_MID_NUMT"} return(label) } ``` Labeling the regions for our chr1 ```{r} start_chrm1_NUMT = 24611535 end_chrm1_NUMT = 24616184 chr1_binned_read_depth = read_depth %>% filter(CHRM == "chr1") %>% #filtering info for the junction before and after #and the NUMT region itself filter(START > start_chrm1_NUMT-10) %>% filter(START < end_chrm1_NUMT+10) %>% rowwise() %>% mutate(REGION = chr1_region_labeler(START)) %>% #calculating the average depth in the region %>% select(STRAIN, TISSUE, AGE_BIN, CHRM, COND_DEPTH, REGION) %>% group_by(STRAIN, TISSUE, AGE_BIN, CHRM, REGION) %>% summarise(AVG_DEPTH_PER_REGION = mean(COND_DEPTH)) ``` Labeling the regions for our chrM ```{r} start_chrmM_region = 6394 end_chrmM_region = 11042 chrM_binned_read_depth = read_depth %>% filter(CHRM == "chrM") %>% #filtering info for the junction before and after #and the NUMT region itself filter(START > start_chrmM_region-10) %>% filter(START < end_chrmM_region+10) %>% rowwise() %>% mutate(REGION = chrM_region_labeler(START)) %>% #calculating the average depth in the region %>% select(STRAIN, TISSUE, AGE_BIN, CHRM, COND_DEPTH, REGION) %>% group_by(STRAIN, TISSUE, AGE_BIN, CHRM, REGION) %>% summarise(AVG_DEPTH_PER_REGION = mean(COND_DEPTH)) ``` Now let's concat our info for the chrM and chr1 region so that we can do all our calculations in one df ```{r} merged_info = rbind(chrM_binned_read_depth, chr1_binned_read_depth) ``` ```{r} rm(chrM_binned_read_depth, chr1_binned_read_depth) ``` ```{r} cont_est_df = left_join(merged_info, average_depth_chrM_NUMT_region, by = c("STRAIN", "TISSUE", "AGE_BIN")) ``` ```{r} output_df = cont_est_df %>% mutate(CONT_EST = AVG_DEPTH_PER_REGION/AVG_READ_DEPTH) %>% select(STRAIN, TISSUE, AGE_BIN, CHRM, REGION, CONT_EST) %>% pivot_wider(names_from = REGION, values_from = CONT_EST) ``` ```{r} setwd("~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/checking_NUMT") write.table(output_df, file = paste(outdir_files,"cons_junction_cont_est.txt", sep = ""), sep = "\t", quote = F, row.names = T) ``` Plotting the ratio of reads mapping before the NUMT and to the mid range of the NUMT ```{r} plotting_df = output_df %>% mutate(X_LABEL = paste(STRAIN,"_",TISSUE,"_",AGE_BIN)) ``` ```{r} setwd("~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/checking_NUMT") before_NUMT_comparison = ggplot(plotting_df, aes(x = X_LABEL, y = JUNCTION_BEFORE, color = CHRM)) + scale_y_log10() + geom_point() + theme_bw() + ylab("Ratio of Reads Mapped [log10]\n (Junction Before: chrM)") + xlab("Condition") + scale_color_manual(name = "Chromosome", values = c("cornflowerblue", "black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9), text = element_text(family = "sans"), axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 10), axis.text.y=element_text(size = 9), legend.position = "None") pdf(paste(outdir_figures,"before_NUMT_junction.pdf",sep=""),width=5,height=4.25) print(before_NUMT_comparison) dev.off() pdf(paste(outdir_figures,"leg_before_NUMT_junction.pdf",sep=""),width=5,height=4.5) print(before_NUMT_comparison + guides(color = guide_legend(override.aes = list(size = 3))) + theme(legend.position = "bottom", legend.text = element_text(size = 12))) dev.off() ``` ```{r} setwd("~/Documents/Science/Analyses/Conplastic_Strains/files_and_analyses/checking_NUMT") after_NUMT_comparison = ggplot(plotting_df, aes(x = X_LABEL, y = JUNCTION_AFTER, color = CHRM)) + scale_y_log10() + geom_point() + theme_bw() + xlab("Condition") + ylab("Ratio of Reads Mapped [log10]\n (Junction After: Midpoint of NUMT)") + scale_color_manual(name = "Chromosome", values = c("cornflowerblue", "black")) + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9), text = element_text(family = "sans"), axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 10), axis.text.y=element_text(size = 9), legend.position = "None") pdf(paste(outdir_figures,"after_NUMT_junction.pdf",sep=""),width=5,height=4.25) print(after_NUMT_comparison) dev.off() ```