--- title: 'Report breakdown by barcode' author: 'ampliCan' date: '`r format(Sys.time(), "%d %B %Y")`' output: html_document: toc: true theme: paper toc_float: true number_sections: true params: alignments: !r system.file('extdata', 'results', 'alignments', 'events_filtered_shifted_normalized.csv', package = 'amplican') config_summary: !r system.file('extdata', 'results', 'config_summary.csv', package = 'amplican') unassigned_folder: !r system.file('extdata', 'results', 'alignments', 'unassigned_reads.csv', package = 'amplican') top: 5 vignette: > %\VignetteIndexEntry{example barcode_report report} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r load data, message=F, warning=FALSE, include=FALSE} library(amplican) library(ggplot2) alignments <- data.table::fread(params$alignments) data.table::setDF(alignments) config <- data.frame(data.table::fread(params$config_summary)) height <- amplican::plot_height(length(unique(config$Barcode))) ``` *** # Description *** **Read distribution plot** - plot shows number of reads assigned during read grouping **Filtered Reads** - plot shows percentage of assigned reads that have been recognized as PRIMER DIMERS or filtered based on low alignment score **Cutting rates** - plot gives overview of percentage of reads (not filtered as PRIMER DIMER) that have cut **Frameshift** - plot shows what percentage of reads that have frameshift **Frameshift overlapping** - shows what percentage of reads have frameshift counting only deletions and insertions that overlap expected cut site (should be more accurate when controls are not available) **Read heterogeneity plot** - shows what is the share of each of the unique reads in total count of all reads. The more yellow each row, the less heterogeneity in the reads, more black means reads don't repeat often and are unique **Top unassigned reads** - take a look at the alignment of most abundant forward and reverse complemented reverse reads for each barcode, if you find that there is many unassigned reads you can ivestigate here. *** # Barcode Summary *** ## Groups IDs ```{r group ids, echo=FALSE, message=F, warning=FALSE} library(knitr) groupDF <- data.frame(group = unique(config$Barcode), IDs = sapply(unique(config$Barcode), function(x) toString(config$ID[config$Barcode == x]))) kable(groupDF, row.names = FALSE) ``` ## Read distribution ```{r plot_total_reads, echo=FALSE, fig.height=height + height/4, fig.width=14, message=F, warning=FALSE} p <- ggplot(data = config, aes(x = as.factor(Barcode), y = log10(Reads + 1), order = Barcode, fill = ID)) + geom_bar(position='stack', stat='identity') + ylab('Number of reads + 1, log10 scaled') + xlab('Barcode') if (length(groupDF$group) > 20) { p <- p + theme(legend.position = 'none', legend.direction = 'horizontal', axis.text = element_text(size = 12), axis.title = element_text(size = 14, face = 'bold')) } else { p <- p + theme(legend.position = 'top', legend.direction = 'horizontal', axis.text = element_text(size = 12), axis.title = element_text(size = 14, face = 'bold'), legend.title = element_blank()) } p + coord_flip() ``` ## Filtered Reads ```{r plot_filter_per, echo=FALSE, fig.height=height, fig.width=14, message=F, warning=FALSE} barcodeTable <- aggregate(cbind(Reads, Reads_Filtered, PRIMER_DIMER, Low_Score, Reads_Indel, Reads_Frameshifted) ~ Barcode, data = config, sum) barcodeTable$F_percentage <- (barcodeTable$PRIMER_DIMER + barcodeTable$Low_Score) * 100/barcodeTable$Reads barcodeTable$F_percentage[is.nan(barcodeTable$F_percentage)] <- 0 ggplot(data = barcodeTable, aes(x = as.factor(Barcode), y = F_percentage, order = Barcode, fill = Barcode)) + geom_bar(stat='identity') + ylab('Percentage of filtered reads') + xlab('Barcode') + theme(axis.text = element_text(size=12), axis.title = element_text(size=14, face = 'bold'), legend.position = 'none') + ylim(0, 100) + coord_flip() ``` ## Indel rates ```{r plot mut percentage, echo=FALSE, fig.height=height,fig.width=14, message=F, warning=FALSE} barcodeTable$indel_percentage <- barcodeTable$Reads_Indel * 100/barcodeTable$Reads_Filtered barcodeTable$indel_percentage[is.nan(barcodeTable$indel_percentage)] <- 0 ggplot(data = barcodeTable, aes(x = as.factor(Barcode), y = indel_percentage, order = Barcode)) + geom_bar(stat = 'identity') + ylab('Percentage of reads (not filtered) that have indel') + theme(axis.text = element_text(size=12), axis.title = element_text(size=14, face = 'bold')) + ylim(0,100) + xlab('Barcode') + coord_flip() + geom_text(aes(x = as.factor(Barcode), y = indel_percentage, label = Reads_Indel), hjust = -1) ``` ## Frameshift ```{r plot_frameshift_per, echo=FALSE, fig.height=height, fig.width=14, message=F, warning=FALSE} barcodeTable$frameshift_percentage <- barcodeTable$Reads_Frameshifted * 100/barcodeTable$Reads_Filtered barcodeTable$frameshift_percentage[is.nan(barcodeTable$frameshift_percentage)] <- 0 ggplot(data = barcodeTable, aes(x = as.factor(Barcode), y = frameshift_percentage, order = Barcode)) + geom_bar(position = 'stack', stat = 'identity') + ylab('Percentage of reads (not filtered) that have frameshift') + xlab('Barcode') + theme(axis.text = element_text(size=12), axis.title = element_text(size=14,face = 'bold')) + ylim(0, 100) + coord_flip() + geom_text(aes(x = as.factor(Barcode), y = frameshift_percentage, label = Reads_Frameshifted), hjust = -1) ``` ## Heterogeneity of reads ```{r plot read heterogeneity, echo=FALSE, fig.height=height + 1, fig.width=14, message=F, warning=FALSE} plot_heterogeneity(alignments, config, level = 'Barcode') ``` *** ```{r plot read unassigned reads, results = "asis", echo=FALSE, message=F, warning=FALSE, comment = ''} if (!is.na(config$Forward_Reads_File) && !is.na(config$Reverse_Reads_File) && file.exists(params$unassigned_folder)) { unassigned_reads <- data.table::fread(params$unassigned_folder) data.table::setDF(unassigned_reads) cat("# Top unassigned reads \n\n*** \n\n") for (b in unique(unassigned_reads$Barcode)) { cat("## ", b, " \n\n", sep = "") b_unassiged <- unassigned_reads[unassigned_reads$Barcode == b, ] b_unassiged <- b_unassiged[order(b_unassiged$BarcodeFrequency, decreasing = TRUE), ] if (dim(b_unassiged)[1] == 0) { cat(knitr::asis_output("No unassigned reads in this barcode. That\'s great! \n\n")) } else { topN <- if (dim(b_unassiged)[1] < params$top) dim(b_unassiged)[1] else params$top cat('\n') print(knitr::kable(data.frame(Forward = paste0('P', 1:topN), Reverse = paste0('S', 1:topN), Counts = b_unassiged[1:topN, 'Total'], Frequency = b_unassiged[1:topN, 'BarcodeFrequency']))) cat("\n") cat("
", paste(amplican_print_reads(b_unassiged[1:topN, 'Forward'],
                                              b_unassiged[1:topN, 'Reverse']),
                         collapse = "\n"), "
") cat("\n\n") } } } ```