## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() options(width=80, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), tidy.opts=list(width.cutoff=80), tidy=TRUE) ## ----setup, echo=FALSE, message=FALSE, warning=FALSE-------------------------- suppressPackageStartupMessages({ library(systemPipeR) library(BiocParallel) library(Biostrings) library(Rsamtools) library(GenomicRanges) library(ggplot2) library(GenomicAlignments) library(ShortRead) library(ape) library(batchtools) library(magrittr) }) ## ----install, eval=FALSE------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager") # BiocManager::install("systemPipeR") # BiocManager::install("systemPipeRdata") ## ----documentation, eval=FALSE------------------------------------------------ # library("systemPipeR") # Loads the package # library(help="systemPipeR") # Lists package info # vignette("systemPipeR") # Opens vignette ## ----genRna_workflow, eval=FALSE---------------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow="rnaseq") # setwd("rnaseq") ## ----genRna_npkg, eval=FALSE-------------------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow=NULL, package_repo = "systemPipeR/SPriboseq", ref = "master", subdir = NULL) ## ----genRna_pkg_branch, eval=FALSE-------------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow=NULL, package_repo = "systemPipeR/SPrnaseq", ref = "singleMachine", subdir = NULL) ## ----targetsSE, eval=TRUE----------------------------------------------------- library(systemPipeR) targetspath <- system.file("extdata", "targets.txt", package="systemPipeR") read.delim(targetspath, comment.char = "#")[1:4,] ## ----targetsPE, eval=TRUE----------------------------------------------------- targetspath <- system.file("extdata", "targetsPE.txt", package="systemPipeR") read.delim(targetspath, comment.char = "#")[1:2,1:6] ## ----comment_lines, echo=TRUE------------------------------------------------- readLines(targetspath)[1:4] ## ----targetscomp, eval=TRUE--------------------------------------------------- readComp(file=targetspath, format="vector", delim="-") ## ----CWL_structure, echo = FALSE, eval=FALSE---------------------------------- # hisat2.cwl <- system.file("extdata", "cwl/hisat2/hisat2-se/hisat2-mapping-se.cwl", package="systemPipeR") # yaml::read_yaml(hisat2.cwl) ## ----yaml_structure, echo = FALSE, eval=FALSE--------------------------------- # hisat2.yml <- system.file("extdata", "cwl/hisat2/hisat2-se/hisat2-mapping-se.yml", package="systemPipeR") # yaml::read_yaml(hisat2.yml) ## ----SYSargs2_structure, eval=TRUE-------------------------------------------- library(systemPipeR) targets <- system.file("extdata", "targets.txt", package="systemPipeR") dir_path <- system.file("extdata/cwl/hisat2/hisat2-se", package="systemPipeR") WF <- loadWF(targets=targets, wf_file="hisat2-mapping-se.cwl", input_file="hisat2-mapping-se.yml", dir_path=dir_path) WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_")) ## ----names_WF, eval=TRUE------------------------------------------------------ names(WF) ## ----cmdlist, eval=TRUE------------------------------------------------------- cmdlist(WF)[1] ## ----output_WF, eval=TRUE----------------------------------------------------- output(WF)[1] modules(WF) targets(WF)[1] targets.as.df(targets(WF))[1:4,1:4] output(WF)[1] cwlfiles(WF) inputvars(WF) ## ----table_tools, echo=FALSE, message=FALSE----------------------------------- library(magrittr) # library(kableExtra) SPR_software <- system.file("extdata", "SPR_software.csv", package="systemPipeR") #SPR_software <- "SPR_software.csv" software <- read.delim(SPR_software, sep=",", comment.char = "#") colors <- colorRampPalette((c("darkseagreen", "indianred1")))(length(unique(software$Category))) id <- as.numeric(c((unique(software$Category)))) software %>% dplyr::mutate(Step = kableExtra::cell_spec(Step, color = "white", bold = TRUE, background = factor(Category, id, colors) )) %>% dplyr::select(Tool, Description, Step) %>% dplyr::arrange(Tool) %>% kableExtra::kable(escape = FALSE, align = "c", col.names = c("Tool Name", "Description", "Step")) %>% kableExtra::kable_styling(c("striped", "hover", "condensed"), full_width = TRUE) %>% kableExtra::scroll_box(width = "80%", height = "500px") ## ----test_tool_path, eval=FALSE----------------------------------------------- # tryCL(command="grep") ## ----param_structure, eval=TRUE----------------------------------------------- parampath <- system.file("extdata", "tophat.param", package="systemPipeR") read.delim(parampath, comment.char = "#") ## ----param_import, eval=TRUE-------------------------------------------------- targetspath <- system.file("extdata", "targets.txt", package="systemPipeR") args <- suppressWarnings(systemArgs(sysma=parampath, mytargets=targetspath)) args ## ----sysarg_access, eval=TRUE------------------------------------------------- names(args) ## ----sysarg_access2, eval=TRUE------------------------------------------------ sysargs(args)[1] modules(args) cores(args) outpaths(args)[1] ## ----sysarg_json, eval=TRUE--------------------------------------------------- systemArgs(sysma=parampath, mytargets=targetspath, type="json") ## ----workflow, eval=FALSE----------------------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow="rnaseq") # setwd("rnaseq") ## ----test_software, eval=FALSE------------------------------------------------ # tryCL(command="hisat2") ## "All set up, proceed!" ## ----initwf, eval=FALSE------------------------------------------------------- # getwd() ## rnaseq # script <- "systemPipeRNAseq.Rmd" # targetspath <- "targets.txt" # sysargslist <- initWF(script = script, targets = targetspath) ## ----initwf_tempdir, eval=FALSE----------------------------------------------- # library(systemPipeRdata) # script <- system.file("extdata/workflows/rnaseq", "systemPipeRNAseq.Rmd", package="systemPipeRdata") # targets <- system.file("extdata", "targets.txt", package="systemPipeR") # dir_path <- tempdir() # SYSconfig <- initProject(projPath=dir_path, targets=targets, script=script, overwrite = TRUE) # sysargslist_temp <- initWF(sysconfig ="SYSconfig.yml") ## ----config, eval=FALSE------------------------------------------------------- # sysargslist <- configWF(x=sysargslist, input_steps = "1:3") # sysargslist <- runWF(sysargslist = sysargslist, steps = "1:2") ## ----pipes, eval=FALSE-------------------------------------------------------- # library(systemPipeR) # sysargslist <- initWF(script ="systemPipeRNAseq.Rmd", overwrite = TRUE) %>% # configWF(input_steps = "1:3") %>% # runWF(steps = "1:2") ## ----closeR, eval=FALSE------------------------------------------------------- # q("no") # closes R session on head node ## ----r_environment, eval=FALSE------------------------------------------------ # system("hostname") # should return name of a compute node starting with i or c # getwd() # checks current working directory of R session # dir() # returns content of current working directory ## ----clusterRun, eval=FALSE--------------------------------------------------- # library(batchtools) # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # reg <- clusterRun(args, FUN = runCommandline, more.args = list(args=args, make_bam=TRUE, dir=FALSE), # conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl", # Njobs=18, runid="01", resourceList=resources) # getStatus(reg=reg) # waitForJobs(reg=reg) ## ----load_package, eval=FALSE------------------------------------------------- # library(systemPipeR) # library(systemPipeRdata) # genWorkenvir(workflow="rnaseq", mydirname=NULL) # setwd("rnaseq") ## ----construct_SYSargs2_trim-se, echo = FALSE, eval=FALSE--------------------- # targets <- system.file("extdata", "targets.txt", package="systemPipeR") # dir_path <- system.file("extdata/cwl/preprocessReads/trim-se", package="systemPipeR") # trim <- loadWorkflow(targets=targets, wf_file="trim-se.cwl", input_file="trim-se.yml", dir_path=dir_path) # trim <- renderWF(trim, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_")) # trim ## ----construct_SYSargs2_trim-pe, eval=FALSE----------------------------------- # targetsPE <- system.file("extdata", "targetsPE.txt", package="systemPipeR") # dir_path <- system.file("extdata/cwl/preprocessReads/trim-pe", package="systemPipeR") # trim <- loadWorkflow(targets=targetsPE, wf_file="trim-pe.cwl", input_file="trim-pe.yml", dir_path=dir_path) # trim <- renderWF(trim, inputvars=c(FileName1="_FASTQ_PATH1_", FileName2="_FASTQ_PATH2_", SampleName="_SampleName_")) # trim # output(trim)[1:2] ## ----preprocessing, eval=FALSE------------------------------------------------ # preprocessReads(args=trim, Fct="trimLRPatterns(Rpattern='GCCCGGGTAA', # subject=fq)", batchsize=100000, overwrite=TRUE, compress=TRUE) ## ----custom_preprocessing, eval=FALSE----------------------------------------- # filterFct <- function(fq, cutoff=20, Nexceptions=0) { # qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm=TRUE) # # Retains reads where Phred scores are >= cutoff with N exceptions # fq[qcount <= Nexceptions] # } # preprocessReads(args=trim, Fct="filterFct(fq, cutoff=20, Nexceptions=0)", # batchsize=100000) ## ----trimGalore, eval=FALSE--------------------------------------------------- # targets <- system.file("extdata", "targets.txt", package="systemPipeR") # dir_path <- system.file("extdata/cwl/trim_galore/trim_galore-se", package="systemPipeR") # trimG <- loadWorkflow(targets=targets, wf_file="trim_galore-se.cwl", input_file="trim_galore-se.yml", dir_path=dir_path) # trimG <- renderWF(trimG, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_")) # trimG # cmdlist(trimG)[1:2] # output(trimG)[1:2] # ## Run Single Machine Option # trimG <- runCommandline(trimG[1], make_bam = FALSE) ## ----trimmomatic, eval=FALSE-------------------------------------------------- # targetsPE <- system.file("extdata", "targetsPE.txt", package="systemPipeR") # dir_path <- system.file("extdata/cwl/trimmomatic/trimmomatic-pe", package="systemPipeR") # trimM <- loadWorkflow(targets=targetsPE, wf_file="trimmomatic-pe.cwl", input_file="trimmomatic-pe.yml", dir_path=dir_path) # trimM <- renderWF(trimM, inputvars=c(FileName1="_FASTQ_PATH1_", FileName2="_FASTQ_PATH2_", SampleName="_SampleName_")) # trimM # cmdlist(trimM)[1:2] # output(trimM)[1:2] # ## Run Single Machine Option # trimM <- runCommandline(trimM[1], make_bam = FALSE) ## ----fastq_quality, eval=FALSE------------------------------------------------ # fqlist <- seeFastq(fastq=infile1(trim), batchsize=10000, klength=8) # pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist)) # seeFastqPlot(fqlist) # dev.off() ## ----fastq_quality_parallel_single, eval=FALSE-------------------------------- # f <- function(x) seeFastq(fastq=infile1(trim)[x], batchsize=100000, klength=8) # fqlist <- bplapply(seq(along=trim), f, BPPARAM = MulticoreParam(workers=4)) # seeFastqPlot(unlist(fqlist, recursive=FALSE)) ## ----fastq_quality_parallel_cluster, eval=FALSE------------------------------- # library(BiocParallel); library(batchtools) # f <- function(x) { # library(systemPipeR) # targetsPE <- system.file("extdata", "targetsPE.txt", package="systemPipeR") # dir_path <- system.file("extdata/cwl/preprocessReads/trim-pe", package="systemPipeR") # trim <- loadWorkflow(targets=targetsPE, wf_file="trim-pe.cwl", input_file="trim-pe.yml", dir_path=dir_path) # trim <- renderWF(trim, inputvars=c(FileName1="_FASTQ_PATH1_", FileName2="_FASTQ_PATH2_", SampleName="_SampleName_")) # seeFastq(fastq=infile1(trim)[x], batchsize=100000, klength=8) # } # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # param <- BatchtoolsParam(workers = 4, cluster = "slurm", template = "batchtools.slurm.tmpl", resources = resources) # fqlist <- bplapply(seq(along=trim), f, BPPARAM = param) # seeFastqPlot(unlist(fqlist, recursive=FALSE)) ## ----hisat_SYSargs2_object, eval=TRUE----------------------------------------- targets <- system.file("extdata", "targets.txt", package="systemPipeR") dir_path <- system.file("extdata/cwl/hisat2/hisat2-se", package="systemPipeR") args <- loadWorkflow(targets=targets, wf_file="hisat2-mapping-se.cwl", input_file="hisat2-mapping-se.yml", dir_path=dir_path) args <- renderWF(args, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_")) args cmdlist(args)[1:2] output(args)[1:2] ## ----subset, eval=TRUE-------------------------------------------------------- subsetWF(args, slot="input", subset='FileName')[1:2] ## Subsetting the input files for this particular workflow subsetWF(args, slot="output", subset=1, index=1)[1:2] ## Subsetting the output files for one particular step in the workflow subsetWF(args, slot="step", subset=1)[1] ## Subsetting the command-lines for one particular step in the workflow subsetWF(args, slot="output", subset=1, index=1, delete=TRUE)[1] ## DELETING specific output files ## ----hisat_index, eval=FALSE-------------------------------------------------- # dir_path <- system.file("extdata/cwl/hisat2/hisat2-idx", package="systemPipeR") # idx <- loadWorkflow(targets=NULL, wf_file="hisat2-index.cwl", input_file="hisat2-index.yml", dir_path=dir_path) # idx <- renderWF(idx) # idx # cmdlist(idx) # # ## Run # runCommandline(idx, make_bam = FALSE) ## ----runCommandline_align, eval=FALSE----------------------------------------- # runCommandline(args, make_bam = FALSE) ## generates alignments and writes *.sam files to ./results folder # args <- runCommandline(args, make_bam = TRUE) ## same as above but writes files and converts *.sam files to sorted and indexed BAM files. Assigning the new extention of the output files to the object args. ## ----clusterRun_args, eval=FALSE---------------------------------------------- # library(batchtools) # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # reg <- clusterRun(args, FUN = runCommandline, more.args = list(args=args, make_bam=TRUE, dir=FALSE), # conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl", # Njobs=18, runid="01", resourceList=resources) # getStatus(reg=reg) # waitForJobs(reg=reg) ## ----output, eval=FALSE------------------------------------------------------- # args <- output_update(args, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam")) ## Updates the output(args) to the right location in the subfolders # output(args) ## ----writeTargetsout, eval=FALSE---------------------------------------------- # names(clt(args)) # writeTargetsout(x=args, file="default", step = 1, # new_col = "FileName", new_col_output_index = 1, overwrite = TRUE) ## ----hisat_alignment, eval=FALSE---------------------------------------------- # targets <- system.file("extdata", "targets.txt", package="systemPipeR") # dir_path <- system.file("extdata/cwl/workflow-hisat2/workflow-hisat2-se", package="systemPipeR") # WF <- loadWorkflow(targets=targets, wf_file="workflow_hisat2-se.cwl", input_file="workflow_hisat2-se.yml", dir_path=dir_path) # WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_")) # WF # cmdlist(WF)[1:2] # output(WF)[1:2] ## ----bowtie_index, eval=FALSE------------------------------------------------- # dir_path <- system.file("extdata/cwl/bowtie2/bowtie2-idx", package="systemPipeR") # idx <- loadWorkflow(targets=NULL, wf_file="bowtie2-index.cwl", input_file="bowtie2-index.yml", dir_path=dir_path) # idx <- renderWF(idx) # idx # cmdlist(idx) # # ## Run in single machine # runCommandline(idx, make_bam = FALSE) ## ----tophat2-pe, eval=FALSE--------------------------------------------------- # targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR") # dir_path <- system.file("extdata/cwl/tophat2/tophat2-pe", package="systemPipeR") # tophat2PE <- loadWorkflow(targets = targetsPE, wf_file = "tophat2-mapping-pe.cwl", # input_file = "tophat2-mapping-pe.yml", dir_path = dir_path) # tophat2PE <- renderWF(tophat2PE, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_")) # tophat2PE # cmdlist(tophat2PE)[1:2] # output(tophat2PE)[1:2] # # ## Run in single machine # tophat2PE <- runCommandline(tophat2PE[1], make_bam = TRUE) ## ----tophat2-pe_parallel, eval=FALSE------------------------------------------ # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # reg <- clusterRun(tophat2PE, FUN = runCommandline, more.args = list(args=tophat2PE, make_bam=TRUE, dir=FALSE), # conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl", # Njobs=18, runid="01", resourceList=resources) # waitForJobs(reg=reg) ## ----writeTargetsout_tophat2PE, eval=FALSE------------------------------------ # names(clt(tophat2PE)) # writeTargetsout(x=tophat2PE, file="default", step = 1, # new_col = "tophat2PE", new_col_output_index = 1, overwrite = TRUE) ## ----bowtie2_index, eval=FALSE------------------------------------------------ # dir_path <- system.file("extdata/cwl/bowtie2/bowtie2-idx", package="systemPipeR") # idx <- loadWorkflow(targets=NULL, wf_file="bowtie2-index.cwl", input_file="bowtie2-index.yml", dir_path=dir_path) # idx <- renderWF(idx) # idx # cmdlist(idx) # # ## Run in single machine # runCommandline(idx, make_bam = FALSE) ## ----bowtie2_SYSargs2, eval=FALSE--------------------------------------------- # targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR") # dir_path <- system.file("extdata/cwl/bowtie2/bowtie2-pe", package="systemPipeR") # bowtiePE <- loadWorkflow(targets = targetsPE, wf_file = "bowtie2-mapping-pe.cwl", # input_file = "bowtie2-mapping-pe.yml", dir_path = dir_path) # bowtiePE <- renderWF(bowtiePE, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_")) # bowtiePE # cmdlist(bowtiePE)[1:2] # output(bowtiePE)[1:2] ## ----bowtie2_cluster, eval=FALSE---------------------------------------------- # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # reg <- clusterRun(bowtiePE, FUN = runCommandline, more.args = list(args=bowtiePE, dir = FALSE), # conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl", # Njobs = 18, runid = "01", resourceList = resources) # getStatus(reg = reg) ## ----bowtie2_sm, eval=FALSE--------------------------------------------------- # bowtiePE <- runCommandline(bowtiePE) ## ----writeTargetsout_bowtiePE, eval=FALSE------------------------------------- # names(clt(bowtiePE)) # writeTargetsout(x=bowtiePE, file="default", step = 1, # new_col = "bowtiePE", new_col_output_index = 1, overwrite = TRUE) ## ----bwa_index, eval=FALSE---------------------------------------------------- # dir_path <- system.file("extdata/cwl/bwa/bwa-idx", package="systemPipeR") # idx <- loadWorkflow(targets=NULL, wf_file="bwa-index.cwl", input_file="bwa-index.yml", dir_path=dir_path) # idx <- renderWF(idx) # idx # cmdlist(idx) # Indexes reference genome # # ## Run # runCommandline(idx, make_bam = FALSE) ## ----bwa-pe_alignment, eval=FALSE--------------------------------------------- # targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR") # dir_path <- system.file("extdata/cwl/bwa/bwa-pe", package="systemPipeR") # bwaPE <- loadWorkflow(targets = targetsPE, wf_file = "bwa-pe.cwl", # input_file = "bwa-pe.yml", dir_path = dir_path) # bwaPE <- renderWF(bwaPE, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_")) # bwaPE # cmdlist(bwaPE)[1:2] # output(bwaPE)[1:2] # ## Single Machine # bwaPE <- runCommandline(args= bwaPE, make_bam=FALSE) # # ## Cluster # library(batchtools) # resources <- list(walltime = 120, ntasks = 1, ncpus = 4, memory = 1024) # reg <- clusterRun(bwaPE, FUN = runCommandline, more.args = list(args=bwaPE, dir = FALSE), # conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl", # Njobs = 18, runid = "01", resourceList = resources) # getStatus(reg = reg) ## ----writeTargetsout_bwaPE, eval=FALSE---------------------------------------- # names(clt(bwaPE)) # writeTargetsout(x=bwaPE, file="default", step = 1, # new_col = "bwaPE", new_col_output_index = 1, overwrite = TRUE) ## ----rsubread, eval=FALSE----------------------------------------------------- # ## Build the index: # dir_path <- system.file("extdata/cwl/rsubread/rsubread-idx", package="systemPipeR") # idx <- loadWorkflow(targets = NULL, wf_file = "rsubread-index.cwl", # input_file = "rsubread-index.yml", dir_path = dir_path) # idx <- renderWF(idx) # idx # cmdlist(idx) # runCommandline(args= idx, make_bam = FALSE) # # ## Running the alignment: # targets <- system.file("extdata", "targets.txt", package = "systemPipeR") # dir_path <- system.file("extdata/cwl/rsubread/rsubread-se", package="systemPipeR") # rsubread <- loadWorkflow(targets = targets, wf_file = "rsubread-mapping-se.cwl", # input_file = "rsubread-mapping-se.yml", dir_path = dir_path) # rsubread <- renderWF(rsubread, inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_")) # rsubread # cmdlist(rsubread)[1] # # ## Single Machine # rsubread <- runCommandline(args=rsubread[1]) ## ----writeTargetsout_rsubread, eval=FALSE------------------------------------- # names(clt(rsubread)) # writeTargetsout(x=rsubread, file="default", step = 1, # new_col = "rsubread", new_col_output_index = 1, overwrite = TRUE) ## ----gsnap, eval=FALSE-------------------------------------------------------- # ## Build the index: # dir_path <- system.file("extdata/cwl/gsnap/gsnap-idx", package="systemPipeR") # idx <- loadWorkflow(targets = NULL, wf_file = "gsnap-index.cwl", # input_file = "gsnap-index.yml", dir_path = dir_path) # idx <- renderWF(idx) # idx # cmdlist(idx) # runCommandline(args= idx, make_bam = FALSE) # # ## Running the alignment: # targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR") # dir_path <- system.file("extdata/cwl/gsnap/gsnap-pe", package="systemPipeR") # gsnap <- loadWorkflow(targets = targetsPE, wf_file = "gsnap-mapping-pe.cwl", # input_file = "gsnap-mapping-pe.yml", dir_path = dir_path) # gsnap <- renderWF(gsnap, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_", SampleName = "_SampleName_")) # gsnap # cmdlist(gsnap)[1] # output(gsnap)[1] # # ## Cluster # library(batchtools) # resources <- list(walltime = 120, ntasks = 1, ncpus = 4, memory = 1024) # reg <- clusterRun(gsnap, FUN = runCommandline, more.args = list(args=gsnap, make_bam=FALSE), # conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl", # Njobs = 18, runid = "01", resourceList = resources) # getStatus(reg = reg) # gsnap <- output_update(gsnap, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam")) ## ----writeTargetsout_gsnap, eval=FALSE---------------------------------------- # names(clt(gsnap)) # writeTargetsout(x=gsnap, file="default", step = 1, # new_col = "gsnap", new_col_output_index = 1, overwrite = TRUE) ## ----igv, eval=FALSE---------------------------------------------------------- # symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"), # urlbase="http://myserver.edu/~username/", # urlfile="IGVurl.txt") ## ----create_txdb, eval=FALSE-------------------------------------------------- # library(GenomicFeatures) # txdb <- makeTxDbFromGFF(file="data/tair10.gff", format="gff", dataSource="TAIR", organism="Arabidopsis thaliana") # saveDb(txdb, file="./data/tair10.sqlite") ## ----read_counting_multicore, eval=FALSE-------------------------------------- # library(BiocParallel) # txdb <- loadDb("./data/tair10.sqlite") # eByg <- exonsBy(txdb, by="gene") # outpaths <- subsetWF(args, slot="output", subset=1, index=1) # bfl <- BamFileList(outpaths, yieldSize=50000, index=character()) # multicoreParam <- MulticoreParam(workers=4); register(multicoreParam); registered() # counteByg <- bplapply(bfl, function(x) summarizeOverlaps(eByg, x, mode="Union", ignore.strand=TRUE, inter.feature=TRUE, singleEnd=TRUE)) # # # Note: for strand-specific RNA-Seq set 'ignore.strand=FALSE' and for PE data set 'singleEnd=FALSE' # countDFeByg <- sapply(seq(along=counteByg), # function(x) assays(counteByg[[x]])$counts) # rownames(countDFeByg) <- names(rowRanges(counteByg[[1]])); colnames(countDFeByg) <- names(bfl) # rpkmDFeByg <- apply(countDFeByg, 2, function(x) returnRPKM(counts=x, ranges=eByg)) # write.table(countDFeByg, "results/countDFeByg.xls", col.names=NA, quote=FALSE, sep="\t") # write.table(rpkmDFeByg, "results/rpkmDFeByg.xls", col.names=NA, quote=FALSE, sep="\t") ## ----read_counting_multinode, eval=FALSE-------------------------------------- # library(BiocParallel) # f <- function(x) { # library(systemPipeR); library(BiocParallel); library(GenomicFeatures) # txdb <- loadDb("./data/tair10.sqlite") # eByg <- exonsBy(txdb, by="gene") # args <- systemArgs(sysma="param/tophat.param", mytargets="targets.txt") # outpaths <- subsetWF(args, slot="output", subset=1, index=1) # bfl <- BamFileList(outpaths, yieldSize=50000, index=character()) # summarizeOverlaps(eByg, bfl[x], mode="Union", ignore.strand=TRUE, inter.feature=TRUE, singleEnd=TRUE) # } # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # param <- BatchtoolsParam(workers = 4, cluster = "slurm", template = "batchtools.slurm.tmpl", resources = resources) # counteByg <- bplapply(seq(along=args), f, BPPARAM = param) # countDFeByg <- sapply(seq(along=counteByg), # function(x) assays(counteByg[[x]])$counts) # rownames(countDFeByg) <- names(rowRanges(counteByg[[1]])); colnames(countDFeByg) <- names(outpaths) ## ----process_monitoring, eval=FALSE------------------------------------------- # getStatus(reg=reg) # outpaths <- subsetWF(args, slot="output", subset=1, index=1) # file.exists(outpaths) # sapply(1:length(outpaths), function(x) loadResult(reg, id=x)) # Works after job completion ## ----align_stats1, eval=FALSE------------------------------------------------- # read_statsDF <- alignStats(args) # write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t") ## ----align_stats2, eval=TRUE-------------------------------------------------- read.table(system.file("extdata", "alignStats.xls", package="systemPipeR"), header=TRUE)[1:4,] ## ----align_stats_parallel, eval=FALSE----------------------------------------- # f <- function(x) alignStats(args[x]) # read_statsList <- bplapply(seq(along=args), f, # BPPARAM = MulticoreParam(workers=8)) # read_statsDF <- do.call("rbind", read_statsList) ## ----align_stats_parallel_cluster, eval=FALSE--------------------------------- # library(BiocParallel); library(batchtools) # f <- function(x) { # library(systemPipeR) # targets <- system.file("extdata", "targets.txt", package="systemPipeR") # dir_path <- "param/cwl/hisat2/hisat2-se" ## TODO: replace path to system.file # args <- loadWorkflow(targets=targets, wf_file="hisat2-mapping-se.cwl", input_file="hisat2-mapping-se.yml", dir_path=dir_path) # args <- renderWF(args, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_")) # args <- output_update(args, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam")) # alignStats(args[x]) # } # resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024) # param <- BatchtoolsParam(workers = 4, cluster = "slurm", template = "batchtools.slurm.tmpl", resources = resources) # read_statsList <- bplapply(seq(along=args), f, BPPARAM = param) # read_statsDF <- do.call("rbind", read_statsList) ## ----read_counting_mirna, eval=FALSE------------------------------------------ # system("wget ftp://mirbase.org/pub/mirbase/19/genomes/My_species.gff3 -P ./data/") # gff <- import.gff("./data/My_species.gff3") # gff <- split(gff, elementMetadata(gff)$ID) # bams <- names(bampaths); names(bams) <- targets$SampleName # bfl <- BamFileList(bams, yieldSize=50000, index=character()) # countDFmiR <- summarizeOverlaps(gff, bfl, mode="Union", ignore.strand=FALSE, inter.feature=FALSE) # Note: inter.feature=FALSE important since pre and mature miRNA ranges overlap # rpkmDFmiR <- apply(countDFmiR, 2, # function(x) returnRPKM(counts=x, gffsub=gff)) # write.table(assays(countDFmiR)$counts, "results/countDFmiR.xls", col.names=NA, quote=FALSE, sep="\t") # write.table(rpkmDFmiR, "results/rpkmDFmiR.xls", col.names=NA, quote=FALSE, sep="\t") ## ----sample_tree_rlog, eval=TRUE---------------------------------------------- library(DESeq2, warn.conflicts=FALSE, quietly=TRUE); library(ape, warn.conflicts=FALSE) countDFpath <- system.file("extdata", "countDFeByg.xls", package="systemPipeR") countDF <- as.matrix(read.table(countDFpath)) colData <- data.frame(row.names=targets.as.df(targets(args))$SampleName, condition=targets.as.df(targets(args))$Factor) dds <- DESeqDataSetFromMatrix(countData = countDF, colData = colData, design = ~ condition) d <- cor(assay(rlog(dds)), method="spearman") hc <- hclust(dist(1-d)) plot.phylo(as.phylo(hc), type="p", edge.col=4, edge.width=3, show.node.label=TRUE, no.margin=TRUE) ## ----sample_tree_rpkm, eval=FALSE--------------------------------------------- # rpkmDFeBygpath <- system.file("extdata", "rpkmDFeByg.xls", package="systemPipeR") # rpkmDFeByg <- read.table(rpkmDFeBygpath, check.names=FALSE) # rpkmDFeByg <- rpkmDFeByg[rowMeans(rpkmDFeByg) > 50,] # d <- cor(rpkmDFeByg, method="spearman") # hc <- hclust(as.dist(1-d)) # plot.phylo(as.phylo(hc), type="p", edge.col="blue", edge.width=2, show.node.label=TRUE, no.margin=TRUE) ## ----edger_wrapper, eval=TRUE------------------------------------------------- targets <- read.delim(targetspath, comment="#") cmp <- readComp(file=targetspath, format="matrix", delim="-") cmp[[1]] countDFeBygpath <- system.file("extdata", "countDFeByg.xls", package="systemPipeR") countDFeByg <- read.delim(countDFeBygpath, row.names=1) edgeDF <- run_edgeR(countDF=countDFeByg, targets=targets, cmp=cmp[[1]], independent=FALSE, mdsplot="") ## ----edger_deg_counts, eval=TRUE---------------------------------------------- DEG_list <- filterDEGs(degDF=edgeDF, filter=c(Fold=2, FDR=10)) ## ----edger_deg_stats, eval=TRUE----------------------------------------------- names(DEG_list) DEG_list$Summary[1:4,] ## ----deseq2_wrapper, eval=TRUE------------------------------------------------ degseqDF <- run_DESeq2(countDF=countDFeByg, targets=targets, cmp=cmp[[1]], independent=FALSE) ## ----deseq2_deg_counts, eval=TRUE--------------------------------------------- DEG_list2 <- filterDEGs(degDF=degseqDF, filter=c(Fold=2, FDR=10)) ## ----vennplot, eval=TRUE------------------------------------------------------ vennsetup <- overLapper(DEG_list$Up[6:9], type="vennsets") vennsetdown <- overLapper(DEG_list$Down[6:9], type="vennsets") vennPlot(list(vennsetup, vennsetdown), mymain="", mysub="", colmode=2, ccol=c("blue", "red")) ## ----get_go_biomart, eval=FALSE----------------------------------------------- # library("biomaRt") # listMarts() # To choose BioMart database # listMarts(host="plants.ensembl.org") # m <- useMart("plants_mart", host="plants.ensembl.org") # listDatasets(m) # m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="plants.ensembl.org") # listAttributes(m) # Choose data types you want to download # go <- getBM(attributes=c("go_id", "tair_locus", "namespace_1003"), mart=m) # go <- go[go[,3]!="",]; go[,3] <- as.character(go[,3]) # go[go[,3]=="molecular_function", 3] <- "F"; go[go[,3]=="biological_process", 3] <- "P"; go[go[,3]=="cellular_component", 3] <- "C" # go[1:4,] # dir.create("./data/GO") # write.table(go, "data/GO/GOannotationsBiomart_mod.txt", quote=FALSE, row.names=FALSE, col.names=FALSE, sep="\t") # catdb <- makeCATdb(myfile="data/GO/GOannotationsBiomart_mod.txt", lib=NULL, org="", colno=c(1,2,3), idconv=NULL) # save(catdb, file="data/GO/catdb.RData") ## ----go_enrichment, eval=FALSE------------------------------------------------ # load("data/GO/catdb.RData") # DEG_list <- filterDEGs(degDF=edgeDF, filter=c(Fold=2, FDR=50), plot=FALSE) # up_down <- DEG_list$UporDown; names(up_down) <- paste(names(up_down), "_up_down", sep="") # up <- DEG_list$Up; names(up) <- paste(names(up), "_up", sep="") # down <- DEG_list$Down; names(down) <- paste(names(down), "_down", sep="") # DEGlist <- c(up_down, up, down) # DEGlist <- DEGlist[sapply(DEGlist, length) > 0] # BatchResult <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="all", id_type="gene", CLSZ=2, cutoff=0.9, gocats=c("MF", "BP", "CC"), recordSpecGO=NULL) # library("biomaRt") # m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="plants.ensembl.org") # goslimvec <- as.character(getBM(attributes=c("goslim_goa_accession"), mart=m)[,1]) # BatchResultslim <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="slim", id_type="gene", myslimv=goslimvec, CLSZ=10, cutoff=0.01, gocats=c("MF", "BP", "CC"), recordSpecGO=NULL) ## ----plot_go_enrichment, eval=FALSE------------------------------------------- # gos <- BatchResultslim[grep("M6-V6_up_down", BatchResultslim$CLID), ] # gos <- BatchResultslim # pdf("GOslimbarplotMF.pdf", height=8, width=10); goBarplot(gos, gocat="MF"); dev.off() # goBarplot(gos, gocat="BP") # goBarplot(gos, gocat="CC") ## ----hierarchical_clustering, eval=FALSE-------------------------------------- # library(pheatmap) # geneids <- unique(as.character(unlist(DEG_list[[1]]))) # y <- assay(rlog(dds))[geneids, ] # pdf("heatmap1.pdf") # pheatmap(y, scale="row", clustering_distance_rows="correlation", clustering_distance_cols="correlation") # dev.off() ## ----genRna_workflow_single, eval=FALSE--------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow="rnaseq") # setwd("rnaseq") ## ----genChip_workflow_single, eval=FALSE-------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow="chipseq") # setwd("chipseq") ## ----genVar_workflow_single, eval=FALSE--------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow="varseq") # setwd("varseq") ## ----genRibo_workflow_single, eval=FALSE-------------------------------------- # library(systemPipeRdata) # genWorkenvir(workflow="riboseq") # setwd("riboseq") ## ----sessionInfo-------------------------------------------------------------- sessionInfo()