## ----results='hide', message=FALSE--------------------------------------- suppressPackageStartupMessages(require(maftools)) #read TCGA maf file for LAML laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') laml = read.maf(maf = laml.maf, removeSilent = TRUE, useAll = FALSE) ## ------------------------------------------------------------------------ #Typing laml shows basic summary of MAF file. laml #Shows sample summry. getSampleSummary(laml) #Shows frequently mutated genes. getGeneSummary(laml) #Shows all fields in MAF getFields(laml) #Writes maf summary to an output file with basename laml. write.mafSummary(maf = laml, basename = 'laml') ## ----fig.height=6, fig.width=8------------------------------------------- plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE) ## ---- fig.align='left',fig.height=6,fig.width=10, eval=T, fig.align='left'---- #We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization) oncoplot(maf = laml, top = 10, removeNonMutated = TRUE) ## ----results='hide', message=FALSE--------------------------------------- #read TCGA maf file for LAML laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools") amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools") del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools") laml.plus.gistic = read.maf(maf = laml.maf, removeSilent = TRUE, useAll = FALSE, gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, isTCGA = TRUE) ## ---- fig.align='left',fig.height=6,fig.width=10, eval=T, fig.align='left'---- #We will draw oncoplots for top ten mutated genes. (Removing non-mutated samples from the plot for better visualization) oncoplot(maf = laml.plus.gistic, top = 10, removeNonMutated = TRUE, sortByMutation = TRUE) ## ---- fig.height=7,fig.width=10, eval=T, fig.align='left'---------------- #Read FAB classification of TCGA LAML barcodes. laml.fab.anno = system.file('extdata', 'tcga_laml_fab_annotation.txt', package = 'maftools') laml.fab.anno = read.delim(laml.fab.anno, sep = '\t') head(laml.fab.anno) #Changing colors (You can use any colors, here in this example we will use a color palette from RColorBrewer) col = RColorBrewer::brewer.pal(n = 8, name = 'Paired') names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins', 'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del') #We will plot same top ten mutated genes with FAB classification as annotation and using above defined colors. oncoplot(maf = laml, top = 10, annotation = laml.fab.anno, removeNonMutated = TRUE, colors = col) ## ---- fig.height=2,fig.width=8,fig.align='center'------------------------ oncostrip(maf = laml, genes = c('DNMT3A','NPM1', 'RUNX1'), removeNonMutated = TRUE, showTumorSampleBarcodes = FALSE) ## ---- fig.align='default', fig.height=6, fig.width=8, eval = T----------- laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE) #plot titv summary plotTiTv(res = laml.titv) ## ----fig.height=4,fig.width=8,fig.align='center'------------------------- #Lets plot lollipop plot for DNMT3A, which is one of the most frequent mutated gene in Leukemia. dnmt3a.lpop = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', showMutationRate = TRUE, domainLabelSize = 3, defaultYaxis = FALSE) ## ----fig.height=4,fig.width=8,fig.align='center'------------------------- #Lets plot mutations on KIT gene, without repel option. kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', domainLabelSize = 3) #Same plot with repel=TRUE kit.lpop = lollipopPlot(maf = laml, gene = 'KIT', AACol = 'Protein_Change', labelPos = c(416, 418), refSeqID = 'NM_000222', repel = TRUE, domainLabelSize = 3) ## ---- warning=FALSE, message=FALSE, fig.height=4,fig.width=8,fig.align='center'---- laml.dnmt3a = lollipopPlot(maf = laml, gene = 'DNMT3A', AACol = 'Protein_Change', refSeqID = 'NM_175629', labelPos = 882, collapsePosLabel = TRUE, cBioPortal = TRUE, domainLabelSize = 3, defaultYaxis = FALSE) ## ---- fig.height=4,fig.width=8,fig.align='center'------------------------ tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools") plotCBSsegments(cbsFile = tcga.ab.009.seg, maf = laml, labelAll = TRUE) ## ---- results='hide', message=FALSE-------------------------------------- coad <- system.file("extdata", "coad.maf.gz", package = "maftools") coad = read.maf(maf = coad) ## ---- fig.height=5,fig.width=12,fig.align='center'----------------------- coad.rf = rainfallPlot(maf = coad, detectChangePoints = TRUE, fontSize = 12, pointSize = 0.6) ## ---- fig.align='center', fig.height=6, fig.width=10--------------------- laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML') ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- geneCloud(input = laml, minMut = 3) ## ------------------------------------------------------------------------ vafPlot = plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU', flip = TRUE) ## ------------------------------------------------------------------------ all.lesions <- system.file("extdata", "all_lesions.conf_99.txt", package = "maftools") amp.genes <- system.file("extdata", "amp_genes.conf_99.txt", package = "maftools") del.genes <- system.file("extdata", "del_genes.conf_99.txt", package = "maftools") laml.gistic = readGistic(gisticAllLesionsFile = all.lesions, gisticAmpGenesFile = amp.genes, gisticDelGenesFile = del.genes, isTCGA = TRUE) ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- gisticPlot(gistic = laml.gistic) ## ---- fig.align='left',fig.width=7, fig.height=5, eval=T----------------- gr = plotGisticResults(gistic = laml.gistic) ## ------------------------------------------------------------------------ #We will run mutExclusive on top 10 mutated genes. laml.mut.excl = mutExclusive(maf = laml, top = 10) head(laml.mut.excl) ## ---- fig.height=1.5,fig.width=8,fig.align='center'---------------------- oncostrip(maf = laml, genes = c('NPM1', 'RUNX1'), sort = TRUE, removeNonMutated = TRUE) ## ---- fig.align='default', fig.width=7,fig.height=5, message=F,results='hide', eval=T---- laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore') head(laml.sig) ## ---- fig.align='default', fig.width=7,fig.height=5, eval= T------------- plotOncodrive(res = laml.sig, fdrCutOff = 0.1, useFraction = TRUE) ## ---- fig.align='left',fig.width=7, fig.height=5------------------------- laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10) #Protein summary (Printing first 7 columns for display convenience) laml.pfam$proteinSummary[,1:7, with = FALSE] #Domain summary (Printing first 3 columns for display convenience) laml.pfam$domainSummary[,1:3, with = FALSE] ## ------------------------------------------------------------------------ #MutsigCV results for TCGA-AML laml.mutsig <- system.file("extdata", "LAML_sig_genes.txt.gz", package = "maftools") laml.pancan = pancanComparision(mutsigResults = laml.mutsig, qval = 0.1, cohortName = 'LAML', inputSampleSize = 200, label = 1, normSampleSize = TRUE) ## ------------------------------------------------------------------------ laml.surv <- system.file("extdata", "laml_survival.tsv", package = "maftools") laml.surv = read.delim(file = laml.surv, header = TRUE, stringsAsFactors = FALSE) head(laml.surv) #Survival analysis based on grouping of DNMT3A mutation status laml.dnmt3a.surv = mafSurvival(maf = laml, clinicalData = laml.surv, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', showConfInt = TRUE) ## ----results='hide', message=FALSE--------------------------------------- #Primary APL MAF primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools") primary.apl = read.maf(maf = primary.apl) #Relapse APL MAF relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools") relapse.apl = read.maf(maf = relapse.apl) ## ------------------------------------------------------------------------ #We will consider only genes which are mutated in at-least in 5 samples in one of the cohort, to avoid bias due to single mutated genes. pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', minMut = 5) print(pt.vs.rt) ## ---- fig.width=6, fig.height=5, fig.align='center'---------------------- apl.pt.vs.rt.fp = forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.05, show = 'stat', color = c('royalblue', 'maroon')) ## ---- fig.height=3,fig.width=11, eval=T, fig.align='left'---------------- genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3") coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE) ## ---- echo = TRUE, fig.align='center', fig.height=5, fig.width=7, eval=T---- #We will run this for sample TCGA.AB.2972 tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA.AB.2972', vafCol = 'i_TumorVAF_WU') print(tcga.ab.2972.het$clusterMeans) #Visualizing results plotClusters(clusters = tcga.ab.2972.het) ## ---- fig.align='center', fig.height=5, fig.width=7, eval=T-------------- seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools') tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA.AB.3009', segFile = seg, vafCol = 'i_TumorVAF_WU') #Visualizing results. Highlighting those variants on copynumber altered variants. plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE) ## ---- results='hide', message=F, fig.align='center',fig.width=7, fig.height=6, eval = T---- #we will specify for random 4 patients. laml.math = math.score(maf = laml, vafCol = 'i_TumorVAF_WU', sampleName = c('TCGA.AB.3009', 'TCGA.AB.2849', 'TCGA.AB.3002', 'TCGA.AB.2972')) ## ---- eval=T------------------------------------------------------------- print(laml.math) ## ---- eval=FALSE--------------------------------------------------------- # #First we extract adjacent bases to the mutated locus and clssify them into 96 substitution classes. This also estimates APOBEC enrichment per sample. # laml.tnm = trinucleotideMatrix(maf = laml, ref_genome = '/path/to/hg19.fa', # prefix = 'chr', add = TRUE, ignoreChr = 'chr23', useSyn = TRUE) # # reading /path/to/hg19.fa (this might take few minutes).. # # Extracting 5' and 3' adjacent bases.. # # Extracting +/- 20bp around mutated bases for background estimation.. # # Estimating APOBEC enrichment scores.. # # Performing one-way Fisher's test for APOBEC enrichment.. # # APOBEC related mutations are enriched in 2.674% of samples (APOBEC enrichment score > 2 ; 5 of 187 samples) # # Creating mutation matrix.. # # matrix of dimension 193x96 ## ---- eval=FALSE--------------------------------------------------------- # plotSignatures(laml.tnm) ## ---- echo=FALSE--------------------------------------------------------- sub.tbl = structure(list(n_mutations = c(3, 8, 4, 6, 6, 10, 4, 11, 7, 11, 12, 8, 10, 11, 10, 9, 10, 9, 18, 11, 20, 8, 10, 10, 15, 9, 12, 13, 17, 9, 15, 14, 10, 13, 9, 14, 10, 11, 13, 14, 19, 18, 14, 13, 16, 15, 16, 15, 16, 22, 17, 19, 14, 18, 14, 20, 14, 25, 19, 23, 17, 26, 14, 5, 14, 13, 9, 4, 8, 7, 10, 10, 14, 3, 1, 4, 12, 10, 9, 2, 10, 8, 1, 2, 2, 20, 4, 2, 11, 3, 11, 24, 3, 6, 8, 11, 2, 14, 12, 10, 10, 17, 15, 1, 13, 10, 12, 10, 1, 15, 2, 1, 9, 4, 11, 6, 4, 7, 1, 5, 15, 1, 17, 17, 16, 2, 13, 16, 11, 14, 1, 12, 20, 9, 14, 9, 5, 10, 5, 1, 8, 9, 8, 14, 2, 11, 2, 1, 13, 1, 4, 2, 7, 15, 7, 6, 9, 21, 4, 7, 2, 14, 6, 5, 4, 2, 14, 10, 5, 9, 6, 13, 9, 8, 8, 7, 6, 8, 11, 3, 10, 26, 17, 17, 35, 5, 1), APOBEC_Enriched = c("yes", "yes", "yes", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", NA, NA, "no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", NA, "no", NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", NA, "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", "no"), fraction_APOBEC_mutations = c(0.333, 0.125, 0.25, 0.167, 0.167, 0.2, 0.25, 0.182, 0.143, 0.182, 0.167, 0.125, 0.2, 0.091, 0.1, 0.111, 0.1, 0.111, 0.111, 0.091, 0.2, 0.125, 0.1, 0.1, 0.067, 0.111, 0.083, 0.077, 0.118, 0.111, 0.067, 0.071, 0.1, 0.077, 0.111, 0.071, 0.1, 0.091, 0.077, 0.071, 0.105, 0.056, 0.071, 0.077, 0.125, 0.067, 0.062, 0.067, 0.062, 0.091, 0.118, 0.105, 0.071, 0.056, 0.071, 0.1, 0.071, 0.04, 0.053, 0.043, 0.059, 0.038, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("n_mutations", "APOBEC_Enriched", "fraction_APOBEC_mutations" ), row.names = c(NA, -187L), class = c("data.table", "data.frame" )) data.table::setDT(sub.tbl) sub.tbl$APOBEC_Enriched = factor(sub.tbl$APOBEC_Enriched, levels = c('yes', 'no')) #Set levels yp = pretty(x = c(1: max(sub.tbl[,n_mutations], na.rm = TRUE))) yp[length(yp)] = max(sub.tbl[,n_mutations], na.rm = TRUE) yp[1] = 1 pieDat = sub.tbl[!is.na(APOBEC_Enriched), mean(fraction_APOBEC_mutations), APOBEC_Enriched] pieDat[,nonApobec := 1 - V1] colnames(pieDat)[2] = 'Apobec' pieDat = data.table::melt(pieDat, id.vars = 'APOBEC_Enriched', drop = FALSE) pieDat[,title := paste0(variable, ' [', round(value, digits = 3), ']')] pieDat$title = gsub(pattern = '^Apobec', replacement = 'tCw', x = pieDat$title) pieDat$title = gsub(pattern = '^nonApobec', replacement = 'non-tCw', x = pieDat$title) layout(matrix(c(1,2,1,3), 2, 2, byrow = TRUE), widths=c(2, 3)) par(bty="n", mgp = c(0.5,0.5,0), las=1, tcl=-.25, font.main=4, xpd=NA) pieCol = c("#084594", "#9ECAE1") boxplot(n_mutations ~ APOBEC_Enriched, data = sub.tbl, xaxt="n", boxwex=0.6, outline = TRUE, lty=1, outwex=0, staplewex=0, frame.plot = FALSE, col = c('maroon', 'royalblue'), yaxt = 'n', ylim = c(min(yp), max(yp)), outcol="gray70", outcex = 0.8, outpch = 16) title(main = 'Mutation load between APOBEC enriched \n and non-APOBEC enriched samples', cex.main=0.9) axis(side = 1, at = c(1, 2), labels = na.omit(sub.tbl[,.N,APOBEC_Enriched])[,paste0('N=', N)], las = 1, tick = FALSE) axis(side = 2, at = yp, lwd = 1.8, las = 1) pie(x = pieDat[APOBEC_Enriched %in% 'yes', value], col = pieCol, border="white", radius = 0.95, cex.main=0.6, labels = pieDat[APOBEC_Enriched %in% 'yes', title], clockwise = TRUE) symbols(0,0,circles=.4, inches=FALSE, col="white", bg="white", lty=0, add=TRUE) title(main = 'Average tCw mutations in \n APOBEC enriched samples', cex.main=0.9) pie(x = pieDat[APOBEC_Enriched %in% 'no', value], col = pieCol, border="white", radius = 0.95, cex.main=1.33, labels = pieDat[APOBEC_Enriched %in% 'no', title], clockwise = TRUE) symbols(0,0,circles=.4, inches=FALSE, col="white", bg="white", lty=0, add=TRUE) title(main = 'Average tCw mutations in \n non-APOBEC enriched samples', cex.main = 0.9) ## ---- fig.height=5, fig.width=5, eval=F, message=FALSE------------------- # #Run main function with maximum 6 signatures. # require('NMF') # laml.sign = extractSignatures(mat = laml.tnm, nTry = 6, plotBestFitRes = FALSE) # # Warning : Found zero mutations for conversions A[T>G]C # # Comparing against experimentally validated 30 signatures.. (See http://cancer.sanger.ac.uk/cosmic/signatures for details.) # # Found Signature_1 most similar to validated Signature_1. CoSine-Similarity: 0.778739082321156 # # Found Signature_2 most similar to validated Signature_1. CoSine-Similarity: 0.782748375695661 ## ---- echo=F------------------------------------------------------------- laml.sign = structure(list(signatures = structure(c(1.08748973712201e-18, 0.00839574301030403, 1.08748973712201e-18, 1.08748973712201e-18, 0.00240385240062567, 0.0077139799108362, 1.08748973712201e-18, 1.08748973712201e-18, 0.0162307404236774, 1.08748973712201e-18, 1.08748973712201e-18, 0.00375682293486133, 0.0149882188409168, 0.0166193873319139, 1.08748973712201e-18, 0.0340641337293563, 0.0177133495392653, 1.08748973712201e-18, 1.08748973712201e-18, 0.010900522793394, 1.08748973712201e-18, 1.08748973712201e-18, 0.0204384802376138, 1.08748973712201e-18, 0.016350784190091, 0.00817539209504552, 0.00817539209504552, 0.00510268819463668, 4.64672034428952e-06, 1.08748973712201e-18, 0.00226222985404151, 0.0136256534917425, 0.0305962431704599, 1.08748973712201e-18, 0.0644710774331736, 0.00640610796992356, 0.0940170090930234, 0.0580805923991668, 0.0849820094561238, 1.08748973712201e-18, 0.0397128561572591, 0.0074963059243485, 0.0626642796173211, 0.0351236523951451, 0.0201728180626018, 0.014196017550947, 0.0643882909888946, 1.08748973712201e-18, 1.08748973712201e-18, 0.00455330562748865, 0.00408769604752276, 1.08748973712201e-18, 0.00272513069834851, 0.00650167581744194, 0.016350784190091, 1.08748973712201e-18, 1.08748973712201e-18, 0.0122630881425683, 1.08748973712201e-18, 0.00334016875229922, 0.00136256534917425, 1.08748973712201e-18, 0.00545026139669701, 0.00545026139669701, 4.42324620317284e-15, 1.08748973712201e-18, 0.00737413388604955, 0.0269789492628818, 0.00988463958213221, 0.0100520699314415, 1.08748973712201e-18, 0.00316631020385699, 1.08748973712201e-18, 0.010900522793394, 1.08748973712201e-18, 0.010900522793394, 0.00272513069834851, 1.08748973712201e-18, 0.00394072268462079, 0.00852966039575841, 7.59566702198884e-17, 0.0115340943434354, 1.08748973712201e-18, 0.00237636221356833, 1.08748973712201e-18, 1.08748973712201e-18, 0.0136256534917425, 0.00181567829546557, 0.00408769604752276, 1.08748973712201e-18, 1.08748973712201e-18, 0.00545026139669701, 0.00408769604752276, 0.00499540259887787, 0.00272513069834851, 0.00353514720450814, 0.0134931114940269, 0.0108267734159331, 0.00758987521539011, 0.00674655574701343, 0.0094753598393619, 0.0070321331529894, 0.0109631530888968, 0.0109631530888968, 0.00344757540744538, 0.00674655574701343, 0.00505991681026007, 0.0128545761394095, 9.04499029878464e-19, 0.00742363125585919, 0.0101198336205201, 9.04499029878464e-19, 9.04499029878464e-19, 0.00505991681026007, 0.00505991681026007, 9.04499029878464e-19, 0.0118064725572735, 0.00421659734188339, 9.04499029878464e-19, 0.00758987521539011, 9.04499029878464e-19, 9.04499029878464e-19, 9.04499029878464e-19, 0.00190175907624695, 0.00421372139194615, 0.00590323627863675, 0.00703305451506787, 9.04499029878464e-19, 0.0131095012433463, 0.0480692096974707, 0.0857521368222207, 0.0171181159082965, 9.04499029878464e-19, 0.0526012816612792, 0.103417003771369, 0.108788211420591, 0.00746704363785783, 0.0358397179449789, 0.051450983147391, 0.0119940343267555, 0.020404090976265, 0.0173566988936855, 0.0225544149118068, 0.0312028203299371, 0.00337327787350671, 0.00645838048724634, 9.04499029878464e-19, 0.00252995840513004, 9.04499029878464e-19, 0.00187921658868539, 9.04499029878464e-19, 0.00590323627863675, 0.00337327787350671, 9.04499029878464e-19, 0.00421659734188339, 0.002149298816947, 9.04499029878464e-19, 0.00168663893675336, 9.04499029878464e-19, 9.04499029878464e-19, 0.0227696256461676, 0.0118064725572735, 0.0123023875952219, 0.00691512349188281, 0.00147207029502386, 0.00642836104928948, 0.0160230698991569, 0.0132200565053105, 0.00758987521539011, 9.04499029878464e-19, 0.0109631530888968, 9.04499029878464e-19, 9.04499029878464e-19, 0.00337327787350671, 0.00430756215200655, 0.0115872086847547, 0.00337327787350667, 0.0086313879649405, 0.00168663893675336, 0.00105917939270956, 0.000843319468376679, 0.00758987521539011, 9.04499029878464e-19, 0.00309283703504524, 9.04499029878464e-19, 0.00252995840513004, 0.00421659734188339, 9.04499029878464e-19, 9.04499029878464e-19, 0.00112484084993519, 9.04499029878464e-19, 0.00287194214691947), .Dim = c(96L, 2L), .Dimnames = list(c("A[C>A]A", "A[C>A]C", "A[C>A]G", "A[C>A]T", "C[C>A]A", "C[C>A]C", "C[C>A]G", "C[C>A]T", "G[C>A]A", "G[C>A]C", "G[C>A]G", "G[C>A]T", "T[C>A]A", "T[C>A]C", "T[C>A]G", "T[C>A]T", "A[C>G]A", "A[C>G]C", "A[C>G]G", "A[C>G]T", "C[C>G]A", "C[C>G]C", "C[C>G]G", "C[C>G]T", "G[C>G]A", "G[C>G]C", "G[C>G]G", "G[C>G]T", "T[C>G]A", "T[C>G]C", "T[C>G]G", "T[C>G]T", "A[C>T]A", "A[C>T]C", "A[C>T]G", "A[C>T]T", "C[C>T]A", "C[C>T]C", "C[C>T]G", "C[C>T]T", "G[C>T]A", "G[C>T]C", "G[C>T]G", "G[C>T]T", "T[C>T]A", "T[C>T]C", "T[C>T]G", "T[C>T]T", "A[T>A]A", "A[T>A]C", "A[T>A]G", "A[T>A]T", "C[T>A]A", "C[T>A]C", "C[T>A]G", "C[T>A]T", "G[T>A]A", "G[T>A]C", "G[T>A]G", "G[T>A]T", "T[T>A]A", "T[T>A]C", "T[T>A]G", "T[T>A]T", "A[T>C]A", "A[T>C]C", "A[T>C]G", "A[T>C]T", "C[T>C]A", "C[T>C]C", "C[T>C]G", "C[T>C]T", "G[T>C]A", "G[T>C]C", "G[T>C]G", "G[T>C]T", "T[T>C]A", "T[T>C]C", "T[T>C]G", "T[T>C]T", "A[T>G]A", "A[T>G]C", "A[T>G]G", "A[T>G]T", "C[T>G]A", "C[T>G]C", "C[T>G]G", "C[T>G]T", "G[T>G]A", "G[T>G]C", "G[T>G]G", "G[T>G]T", "T[T>G]A", "T[T>G]C", "T[T>G]G", "T[T>G]T"), c("Signature_1", "Signature_2"))), coSineSimMat = structure(c(0.76789895507522, 0.757733629596811, 0.171803248681684, 0.199391522195904, 0.407671912943102, 0.372979035914154, 0.344078922420868, 0.319857408370786, 0.573357292983596, 0.562412460243176, 0.685700701802704, 0.686217358302521, 0.377725890462418, 0.386689478887272, 0.382312659188403, 0.407516946456442, 0.339149804914427, 0.305305965796845, 0.386629499233586, 0.15685755480318, 0.350678506033931, 0.562433289508901, 0.268840367435164, 0.322933955777266, 0.108666524311962, 0.0628339785033974, 0.49126593617209, 0.527932757462746, 0.47172512923794, 0.461711639647726, 0.362590079921887, 0.387794528034913, 0.154909499589746, 0.154613800740969, 0.303423806321064, 0.204479833568232, 0.570031076792535, 0.740784602225925, 0.445644443725404, 0.510768207280784, 0.248807908838572, 0.28784224944225, 0.140154287925718, 0.0725523826114571, 0.407829024906403, 0.610157444568381, 0.256945337078229, 0.227615984891259, 0.43572741633734, 0.391864627867027, 0.296855754287958, 0.345602091793204, 0.105681572106723, 0.0918011629446175, 0.0955192240249301, 0.0892005087879189, 0.35734783741945, 0.351111836488432, 0.570462074592721, 0.532907409369077), .Dim = c(2L, 30L), .Dimnames = list(c("Signature_1", "Signature_2"), c("Signature_1", "Signature_2", "Signature_3", "Signature_4", "Signature_5", "Signature_6", "Signature_7", "Signature_8", "Signature_9", "Signature_10", "Signature_11", "Signature_12", "Signature_13", "Signature_14", "Signature_15", "Signature_16", "Signature_17", "Signature_18", "Signature_19", "Signature_20", "Signature_21", "Signature_22", "Signature_23", "Signature_24", "Signature_25", "Signature_26", "Signature_27", "Signature_28", "Signature_29", "Signature_30" ))), nmfObj = NULL), .Names = c("signatures", "coSineSimMat", "nmfObj")) ## ---- fig.width=7, fig.height=5, fig.align='center', eval = T------------ plotSignatures(laml.sign) ## ------------------------------------------------------------------------ require('corrplot') corrplot::corrplot(corr = laml.sign$coSineSimMat, col = RColorBrewer::brewer.pal(n = 9, name = 'Oranges'), is.corr = FALSE, tl.cex = 0.6, tl.col = 'black', cl.cex = 0.6) ## ------------------------------------------------------------------------ var.file = system.file('extdata', 'variants.tsv', package = 'maftools') #This is what input looks like var = read.delim(var.file, sep = '\t') head(var) ## ---- results='hide', eval=F, message=F---------------------------------- # #Annotate # var.maf = oncotate(maflite = var.file, header = TRUE) ## ---- eval = F----------------------------------------------------------- # #Results from oncotate. First 20 columns. # var.maf[1:10, 1:20, with = FALSE] ## ---- eval=T------------------------------------------------------------- var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools") var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene') ## ------------------------------------------------------------------------ #Read sample ICGC data for ESCA esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools") esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE) #Printing first 16 columns for display convenience. print(esca.maf[1:5,1:16, with = FALSE]) ## ---- eval=FALSE--------------------------------------------------------- # laml.mutsig.corrected = prepareMutSig(maf = laml) # # Converting gene names for 1 variants from 1 genes # # Hugo_Symbol MutSig_Synonym N # # 1: ARHGAP35 GRLF1 1 # # Original symbols are preserved under column OG_Hugo_Symbol. ## ------------------------------------------------------------------------ ##Extract data for samples 'TCGA.AB.3009' and 'TCGA.AB.2933' (Printing just 5 rows for display convenience) subsetMaf(maf = laml, tsb = c('TCGA.AB.3009', 'TCGA.AB.2933'))[1:5] ##Same as above but return output as an MAF object subsetMaf(maf = laml, tsb = c('TCGA.AB.3009', 'TCGA.AB.2933'), mafObj = TRUE) ## ------------------------------------------------------------------------ ##Select all Splice_Site mutations from DNMT3A and NPM1 subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'") ##Same as above but include only 'i_transcript_name' column in the output subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name') ## ------------------------------------------------------------------------ sessionInfo()