## ---- setup, include=FALSE----------------------------------------------- library(knitr) library(kableExtra) ## ---- comparison_table, include=FALSE,eval=FALSE------------------------- # qckitfastq <- c("yes","yes","yes+","yes") # seqTools <- c("no","yes","yes","yes") # ShortRead <- c("no","no","no","yes") # FASTQC <- c("yes","yes*","yes*","yes*") # metrics <- data.frame(qckitfastq,seqTools,ShortRead,FASTQC) # rownames(metrics) <- c("Read Length Distribution", # "Per Base Read Quality", "Nucleotide Read Content", "GC Content") # kable(metrics) # # need to do per read sequence quality # # + indicates that the program... # # Here, '*' indicates that the program truncates the file or computes on only the first x samples ## ---- loading_file------------------------------------------------------- library(qckitfastq) infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq") fseq <- seqTools::fastqq(infile) ## ---- read_length-------------------------------------------------------- read_len <- read_length(fseq) kable(head(read_len)) %>% kable_styling() plot_read_length(read_len) ## ---- per_base_sequence_quality------------------------------------------ bs <- per_base_quality(infile) kable(head(bs)) %>% kable_styling() plot_per_base_quality(bs) ## ---- per_read_quality--------------------------------------------------- prq <- per_read_quality(infile) kable(head(prq)) %>% kable_styling() plot_per_read_quality(prq) ## ---- gc_content--------------------------------------------------------- gc_df <- GC_content(infile) kable(head(gc_df)) %>% kable_styling() plot_GC_content(gc_df) ## ---- nucleotide_read_content-------------------------------------------- scA <- read_base_content(fseq, content = "A") kable(head(scA)) %>% kable_styling() rc <- read_content(fseq) kable(head(rc)) %>% kable_styling() plot_read_content(rc) ## ---- kmer_count--------------------------------------------------------- km <- kmer_count(infile,k=6) kable(head(km)) %>% kable_styling() ## ---- overrep_reads------------------------------------------------------ overrep_reads<-overrep_reads(infile) knitr::kable(head(overrep_reads,n = 5)) %>% kable_styling() plot_overrep_reads(overrep_reads) ## ---- overrep_kmer------------------------------------------------------- overkm <-overrep_kmer(infile,7) knitr::kable(head(overkm,n=10)) %>% kable_styling() plot_overrep_kmer(overkm) ## ---- adapter_content---------------------------------------------------- if(.Platform$OS.type != "windows") { infile2 <- system.file("extdata", "test.fq.gz", package = "qckitfastq") ac_sorted <- adapter_content(infile2) kable(head(ac_sorted)) %>% kable_styling() plot_adapter_content(ac_sorted) } ## ---- eval=FALSE, include=FALSE------------------------------------------ # ### Benchmarking # # #To demonstrate the utility of our functions on large datasets... # #(need to benchmark against ShortRead) # #library(seqTools) # #library(ShortRead) # #library(rbenchmark) # #sampler <- FastqSampler('E-MTAB-1147/fastq/ERR127302_1.fastq.gz', 20000)