## ----global_options, include=FALSE--------------------------------------- knitr::opts_chunk$set(warning = FALSE, message = FALSE, eval = FALSE) ## ----load_libraries, results='hide'-------------------------------------- library(RCAS) ## ----sample_data--------------------------------------------------------- library(RCAS) data(queryRegions) #sample queryRegions in BED format() data(gff) #sample GFF file ## ----RCAS_import_data---------------------------------------------------- #library(RCAS) #queryRegions <- importBed(filePath = , sampleN = 10000) #gff <- importGtf(filePath = ) ## ----queryGFF------------------------------------------------------------ overlaps <- queryGff(queryRegions = queryRegions, gffData = gff) #data.table is used to do quick summary operations overlaps.dt <- data.table(as.data.frame(overlaps)) ## ----query_gene_types---------------------------------------------------- biotype_col <- grep('gene_biotype', colnames(overlaps.dt), value = T) df <- overlaps.dt[,length(unique(overlappingQuery)), by = biotype_col] colnames(df) <- c("feature", "count") df$percent <- round(df$count / length(queryRegions) * 100, 1) df <- df[order(count, decreasing = TRUE)] p <- plot_ly(data = df, type = "bar", x = df$feature, y = df$percent, text = paste("count:", df$count), color=df$feature) layout(p = p, margin = list(l=100, r=100, b=150), xaxis = list(showticklabels = TRUE, tickangle = 90), yaxis = list(title = paste("percentage of query regions,", "n =", length(queryRegions)))) ## ----getTxdbFeatures----------------------------------------------------- txdbFeatures <- getTxdbFeaturesFromGRanges(gff) ## ----summarizeQueryRegions----------------------------------------------- summary <- summarizeQueryRegions(queryRegions = queryRegions, txdbFeatures = txdbFeatures) df <- data.frame(summary) df$percent <- round((df$count / length(queryRegions)), 3) * 100 p <- plot_ly( data = df, x = rownames(df), y = df$percent, type = 'bar', text = paste("count:", df$count), color = rownames(df) ) layout(p = p, xaxis = list(title = 'features'), yaxis = list(title = paste("percentage of query regions,", "n =", length(queryRegions) ) ), margin = list(b = 150, r = 50) ) ## ----getTargetedGenesTable----------------------------------------------- dt <- getTargetedGenesTable(queryRegions = queryRegions, txdbFeatures = txdbFeatures) dt <- dt[order(transcripts, decreasing = TRUE)] DT::datatable(dt[1:100], extensions = c('Buttons', 'FixedColumns'), options = list(fixedColumns = TRUE, scrollX = TRUE, dom = 'Bfrtip', buttons = c('copy', 'print', 'csv','excel', 'pdf')), filter = 'bottom' ) ## ----coverageprofile----------------------------------------------------- cov <- calculateCoverageProfile(queryRegions = queryRegions, targetRegions = txdbFeatures$threeUTRs, sampleN = 10000) plot_ly(data = cov, x = ~bins, y = ~coverage, type = 'scatter', mode = 'lines') ## ----coverageprofilelist------------------------------------------------- covList <- calculateCoverageProfileList(queryRegions = queryRegions, targetRegionsList = txdbFeatures, sampleN = 10000) df <- do.call('cbind', covList) df <- df[,!grepl(colnames(df), pattern = '*.bins')] df$bins <- c(1:100) colnames(df) <- gsub(pattern = ".coverage", replacement = "", x = colnames(df)) mdf <- reshape2::melt(df, id.vars = c('bins')) colnames(mdf) <- c('bins', 'feature', 'coverage') p = plot_ly(data = mdf, x = ~bins, y = ~coverage, type = 'scatter', mode = 'lines', color = ~feature) layout(p) ## ----transcriptBoundaryCoverage------------------------------------------ cvg <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, sampleN = 10000) yLimit <- (as.integer(max(c(cvg$fivePrime, cvg$threePrime))/10)+1)*10 p <- subplot( plot_ly(data = cvg, x = ~bases, y = ~fivePrime, type = 'scatter', mode = 'lines'), plot_ly(data = cvg, x = ~bases, y = ~threePrime, type = 'scatter', mode = 'lines'), margin = 0.05 ) %>% layout (xaxis = list(title = 'Distance (bp) to TSS'), xaxis2 = list(title = 'Distance (bp) to TES'), yaxis = list(title = 'coverage', range = c(0, yLimit)), yaxis2 = list(title = 'coverage', range = c(0, yLimit)), showlegend = FALSE) layout(p) ## ----motif_analysis------------------------------------------------------ motifResults <- runMotifRG(queryRegions = queryRegions, genomeVersion = 'hg19', motifN = 2, nCores = 2) par(mfrow = c(1,2), mar = c(2,2,2,2)) for (i in 1:length(motifResults$motifs)) { motifPattern <- motifResults$motifs[[i]]@pattern motifRG::plotMotif(match = motifResults$motifs[[i]]@match$pattern, main = paste0('Motif-',i,': ',motifPattern), entropy = TRUE) } ## ----motif_analysis_table------------------------------------------------ summary <- getMotifSummaryTable(motifResults) DT::datatable(summary, extensions = c('Buttons', 'FixedColumns'), options = list(fixedColumns = TRUE, scrollX = TRUE, dom = 'Bfrtip', buttons = c('copy', 'print', 'csv','excel', 'pdf')), filter = 'bottom' ) ## ----GO analysis--------------------------------------------------------- #get all genes from the GTF data backgroundGenes <- unique(gff$gene_id) #get genes that overlap query regions targetedGenes <- unique(overlaps$gene_id) #run TopGO goBP <- runTopGO(ontology = 'BP', species = 'human', backgroundGenes = backgroundGenes, targetedGenes = targetedGenes) goBP <- goBP[order(goBP$foldEnrichment, decreasing = TRUE),] rownames(goBP) <- goBP$GO.ID goBP <- subset(goBP, select = -c(Annotated,classicFisher, bh, GO.ID)) DT::datatable(goBP[1:10,], extensions = c('Buttons', 'FixedColumns'), options = list(fixedColumns = TRUE, scrollX = TRUE, dom = 'Bfrtip', buttons = c('copy', 'print', 'csv', 'excel', 'pdf')), filter = 'bottom' ) ## ----msigdb_analysis----------------------------------------------------- #geneSets <- parseMsigdb(< path to msigdbFile>) data(geneSets) resultsGSEA <- runGSEA(geneSetList = geneSets, backgroundGenes = backgroundGenes, targetedGenes = targetedGenes) datatable(resultsGSEA[1:10,], extensions = 'FixedColumns', options = list( dom = 't', scrollX = TRUE, scrollCollapse = TRUE ))