## ----eval=FALSE---------------------------------------------------------------
#  if (!require("BiocManager"))
#      install.packages("BiocManager")
#  BiocManager::install("maftools")

## ----loadlib, results='hide', message=FALSE-----------------------------------
library(maftools)

## ----readmaf------------------------------------------------------------------
#path to TCGA LAML MAF file
laml.maf = system.file('extdata', 'tcga_laml.maf.gz', package = 'maftools') 
#clinical information containing survival information and histology. This is optional
laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') 

laml = read.maf(maf = laml.maf, clinicalData = laml.clin)

## ----mafobject----------------------------------------------------------------
#Typing laml shows basic summary of MAF file.
laml

## ----mafsummary, eval=FALSE---------------------------------------------------
#  #Shows sample summry.
#  getSampleSummary(laml)
#  #Shows gene summary.
#  getGeneSummary(laml)
#  #shows clinical data associated with samples
#  getClinicalData(laml)
#  #Shows all fields in MAF
#  getFields(laml)
#  #Writes maf summary to an output file with basename laml.
#  write.mafSummary(maf = laml, basename = 'laml')

## ----summaryPlot,fig.height=4, fig.width=6------------------------------------
plotmafSummary(maf = laml, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

## ----oncoplot, fig.align='left',fig.height=3.5,fig.width=6, fig.align='left'----
#oncoplot for top ten mutated genes.
oncoplot(maf = laml, top = 10)

## ----titv, fig.height=3, fig.width=4.2, eval = T, fig.align='left'------------
laml.titv = titv(maf = laml, plot = FALSE, useSyn = TRUE)
#plot titv summary
plotTiTv(res = laml.titv)

## ----lollipopPlot,fig.align='left', fig.width=4.5, fig.height=2.5-------------
#lollipop plot for DNMT3A, which is one of the most frequent mutated gene in Leukemia.
lollipopPlot(
  maf = laml,
  gene = 'DNMT3A',
  AACol = 'Protein_Change',
  showMutationRate = TRUE,
  labelPos = 882
)

## ----lollipopPlot4, fig.align='left', fig.width=4.5, fig.height=2.5-----------
#example data
my_data = data.frame(pos = sample.int(912, 15, replace = TRUE), count = sample.int(30, 15, replace = TRUE))
head(my_data)
lollipopPlot(data = my_data, gene = "DNMT3A")

## ----plotProtein,fig.align='left', fig.width=5, fig.height=1.2----------------
plotProtein(gene = "TP53", refSeqID = "NM_000546")

## ----rainfallPlot, results='hide', message=FALSE------------------------------
brca <- system.file("extdata", "brca.maf.gz", package = "maftools")
brca = read.maf(maf = brca, verbose = FALSE)

## ----rainfallPlot2, fig.height=3,fig.width=6,fig.align='left'-----------------
rainfallPlot(maf = brca, detectChangePoints = TRUE, pointSize = 0.4)

## ----tcgaCompare, fig.align='left', fig.height=3.25, fig.width=6, message=FALSE, results='hide'----
laml.mutload = tcgaCompare(maf = laml, cohortName = 'Example-LAML', logscale = TRUE, capture_size = 50)

## ----plotVaf, fig.align='left', fig.height=3, fig.width=3---------------------
plotVaf(maf = laml, vafCol = 'i_TumorVAF_WU')

## ----readGistic---------------------------------------------------------------
gistic_res_folder <- system.file("extdata", package = "maftools")
laml.gistic = readGistic(gisticDir = gistic_res_folder, isTCGA = TRUE)

#GISTIC object
laml.gistic

## ----gisticChromPlot, fig.width=4, fig.height=3, fig.align='left'-------------
gisticChromPlot(gistic = laml.gistic, markBands = "all")

## ----coGisticChromPlot, fig.width=4, fig.height=5, fig.align='left'-----------
coGisticChromPlot(gistic1 = laml.gistic, gistic2 = laml.gistic, g1Name = "AML-1", g2Name = "AML-2", type = 'Amp')

## ----gisticBubblePlot, fig.width=4, fig.height=3, fig.align='left'------------
gisticBubblePlot(gistic = laml.gistic)

## ----gisticOncoPlot, fig.align='left',fig.width=5, fig.height=3, eval=T-------
gisticOncoPlot(gistic = laml.gistic, clinicalData = getClinicalData(x = laml), clinicalFeatures = 'FAB_classification', sortByAnnotation = TRUE, top = 10)

## ----segSummarize-------------------------------------------------------------
laml.seg <- system.file("extdata", "LAML_CBS_segments.tsv.gz", package = "maftools")
segSummarize_results = segSummarize(seg = laml.seg)

## ----plotCBSsegments, fig.height=2.5,fig.width=4,fig.align='left'-------------
tcga.ab.009.seg <- system.file("extdata", "TCGA.AB.3009.hg19.seg.txt", package = "maftools")
plotCBSsegments(cbsFile = tcga.ab.009.seg)

## ----somaticInteractions, message=FALSE, fig.height=5, fig.width=5------------
#exclusive/co-occurance event analysis on top 10 mutated genes. 
somaticInteractions(maf = laml, top = 25, pvalue = c(0.05, 0.1))

## ----oncodrive, fig.align='default', fig.width=7,fig.height=5, message=F,results='hide', eval=T----
laml.sig = oncodrive(maf = laml, AACol = 'Protein_Change', minMut = 5, pvalMethod = 'zscore')

## -----------------------------------------------------------------------------
head(laml.sig)

## ----plotOncodrive, fig.align='left', fig.width=3.2, fig.height=3.2-----------
plotOncodrive(res = laml.sig, fdrCutOff = 0.1, useFraction = TRUE, labelSize = 0.5)

## ----pfamDomains, fig.align='left', fig.width=4, fig.height=3-----------------
laml.pfam = pfamDomains(maf = laml, AACol = 'Protein_Change', top = 10)
#Protein summary (Printing first 7 columns for display convenience)
laml.pfam$proteinSummary[,1:7, with = FALSE]
#Domain summary (Printing first 3 columns for display convenience)
laml.pfam$domainSummary[,1:3, with = FALSE]

## ----mafSurvival, fig.width=3, fig.height=3-----------------------------------
#Survival analysis based on grouping of DNMT3A mutation status
mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', isTCGA = TRUE)

## ----survGroup----------------------------------------------------------------
#Using top 20 mutated genes to identify a set of genes (of size 2) to predict poor prognostic groups
prog_geneset = survGroup(maf = laml, top = 20, geneSetSize = 2, time = "days_to_last_followup", Status = "Overall_Survival_Status", verbose = FALSE)

print(prog_geneset)

## ----mafSurvGroup, fig.width=3, fig.height=3----------------------------------
mafSurvGroup(maf = laml, geneSet = c("DNMT3A", "FLT3"), time = "days_to_last_followup", Status = "Overall_Survival_Status")

## ----results='hide', message=FALSE--------------------------------------------
#Primary APL MAF
primary.apl = system.file("extdata", "APL_primary.maf.gz", package = "maftools")
primary.apl = read.maf(maf = primary.apl)
#Relapse APL MAF
relapse.apl = system.file("extdata", "APL_relapse.maf.gz", package = "maftools")
relapse.apl = read.maf(maf = relapse.apl)

## ----mafCompare, fig.align='left'---------------------------------------------
#Considering only genes which are mutated in at-least in 5 samples in one of the cohort to avoid bias due to genes mutated in single sample.
pt.vs.rt <- mafCompare(m1 = primary.apl, m2 = relapse.apl, m1Name = 'Primary', m2Name = 'Relapse', minMut = 5)
print(pt.vs.rt)

## ----forestPlot, fig.width=6, fig.height=4.5, fig.align='left'----------------
forestPlot(mafCompareRes = pt.vs.rt, pVal = 0.1)

## ----coOncoplot, fig.height=2.5,fig.width=6, eval=T, fig.align='left'---------
genes = c("PML", "RARA", "RUNX1", "ARID1B", "FLT3")
coOncoplot(m1 = primary.apl, m2 = relapse.apl, m1Name = 'PrimaryAPL', m2Name = 'RelapseAPL', genes = genes, removeNonMutated = TRUE)

## ----coBarplot, fig.height=3, fig.width=4-------------------------------------
coBarplot(m1 = primary.apl, m2 = relapse.apl, m1Name = "Primary", m2Name = "Relapse")

## ----lollipopPlot2, warning=FALSE, message=FALSE,fig.align='left', results='hide', fig.height=3.5, fig.width=5----
lollipopPlot2(m1 = primary.apl, m2 = relapse.apl, gene = "PML", AACol1 = "amino_acid_change", AACol2 = "amino_acid_change", m1_name = "Primary", m2_name = "Relapse")

## ----clinicalEnrichment-------------------------------------------------------
fab.ce = clinicalEnrichment(maf = laml, clinicalFeature = 'FAB_classification')

#Results are returned as a list. Significant associations p-value < 0.05
fab.ce$groupwise_comparision[p_value < 0.05]

## ----plotEnrichmentResults, fig.width=4, fig.height=3-------------------------
plotEnrichmentResults(enrich_res = fab.ce, pVal = 0.05, geneFontSize = 0.5, annoFontSize = 0.6)

## ----drugInteractions, fig.height=3, fig.width=5------------------------------
dgi = drugInteractions(maf = laml, fontSize = 0.75)

## ----drugInteractions2--------------------------------------------------------
dnmt3a.dgi = drugInteractions(genes = "DNMT3A", drugs = TRUE)
#Printing selected columns.
dnmt3a.dgi[,.(Gene, interaction_types, drug_name, drug_claim_name)]

## ----OncogenicPathways, fig.width=7, fig.height=6-----------------------------
pws = pathways(maf = laml, plotType = 'treemap')

## ----PlotOncogenicPathways, fig.width=6, fig.height=3.5-----------------------
plotPathways(maf = laml, pathlist = pws)

## ----inferHeterogeneity, echo = TRUE, fig.align='left', fig.height=3.5, fig.width=4, eval=T----
#Heterogeneity in sample TCGA.AB.2972
library("mclust")
tcga.ab.2972.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-2972', vafCol = 'i_TumorVAF_WU')
print(tcga.ab.2972.het$clusterMeans)
#Visualizing results
plotClusters(clusters = tcga.ab.2972.het)

## ----plotClusters, fig.align='left', fig.height=3.5, fig.width=4, eval=T------
seg = system.file('extdata', 'TCGA.AB.3009.hg19.seg.txt', package = 'maftools')
tcga.ab.3009.het = inferHeterogeneity(maf = laml, tsb = 'TCGA-AB-3009', segFile = seg, vafCol = 'i_TumorVAF_WU')
#Visualizing results. Highlighting those variants on copynumber altered variants.
plotClusters(clusters = tcga.ab.3009.het, genes = 'CN_altered', showCNvars = TRUE)

## ----trinucleotideMatrix, eval=TRUE-------------------------------------------
#Requires BSgenome object
library("BSgenome.Hsapiens.UCSC.hg19", quietly = TRUE)
laml.tnm = trinucleotideMatrix(maf = laml, prefix = 'chr', add = TRUE, ref_genome = "BSgenome.Hsapiens.UCSC.hg19")

## ----plotApobecDiff, eval=TRUE, fig.height=3, fig.width=5---------------------
plotApobecDiff(tnm = laml.tnm, maf = laml, pVal = 0.2)

## ----echo=FALSE---------------------------------------------------------------
par(mar = c(2, 2, 2, 1))
plot(NA, xlim = c(1, 10), ylim = c(0, 30), frame.plot = FALSE, axes = FALSE, xlab = NA, ylab = NA)
rect(xleft = 3, ybottom = 28, xright = 7, ytop = 30, col = grDevices::adjustcolor("gray70", alpha.f = 0.6), lwd = 1.2, border = "maroon")
text(x = 5, y = 29, labels = "MAF", font = 2)
arrows(x0 = 5, y0 = 28, x1 = 5, y1 = 26, length = 0.1, lwd = 2)
text(x = 5, y = 25, labels = "trinucleotideMatrix()", font = 3)
arrows(x0 = 5, y0 = 24, x1 = 5, y1 = 21, length = 0.1, lwd = 2)
text(x = 5, y = 20, labels = "estimateSignatures()", font = 3)
arrows(x0 = 5, y0 = 19, x1 = 5, y1 = 16, length = 0.1, lwd = 2)
text(x = 5, y = 15, labels = "plotCophenetic()", font = 3)
arrows(x0 = 5, y0 = 14, x1 = 5, y1 = 11, length = 0.1, lwd = 2)
text(x = 5, y = 10, labels = "extractSignatures()", font = 3)
arrows(x0 = 5, y0 = 9, x1 = 5, y1 = 6, length = 0.1, lwd = 2)
text(x = 5, y = 5, labels = "compareSignatures()", font = 3)
arrows(x0 = 5, y0 = 4, x1 = 5, y1 = 1, length = 0.1, lwd = 2)
text(x = 5, y = 0, labels = "plotSignatures()", font = 3)

## ----estimateSignatures, fig.height=5, fig.width=5, eval=FALSE, message=FALSE----
#  library('NMF')
#  laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6)

## ----estimateSignatures2, fig.height=3, fig.width=3, eval=TRUE, message=FALSE, echo=FALSE, include=FALSE----
#Run main function with maximum 6 signatures. 
library('NMF')
laml.sign = estimateSignatures(mat = laml.tnm, nTry = 6, pConstant = 0.1, plotBestFitRes = FALSE, parallel = 2)

## ----plotCophenetic, fig.width=3, fig.height=3, eval=TRUE---------------------
plotCophenetic(res = laml.sign)

## ----extractSignatures, eval=FALSE--------------------------------------------
#  laml.sig = extractSignatures(mat = laml.tnm, n = 3)

## ----extractSignatures2, eval=TRUE, echo=FALSE--------------------------------
laml.sig = extractSignatures(mat = laml.tnm, n = 3, pConstant = 0.1,  parallel = 2)

## ----compareSignatures, eval=TRUE---------------------------------------------
#Compate against original 30 signatures 
laml.og30.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "legacy")
#Compate against updated version3 60 signatures 
laml.v3.cosm = compareSignatures(nmfRes = laml.sig, sig_db = "SBS")

## ----pheatmap, fig.width=7, fig.height=2.5, fig.align='center', eval=TRUE-----
library('pheatmap')
pheatmap::pheatmap(mat = laml.og30.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")

## ----plotSignatures, fig.width=6, fig.height=4, fig.align='center', eval = TRUE----
maftools::plotSignatures(nmfRes = laml.sig, title_size = 1.2, sig_db = "SBS")

## ----legoplot3d, eval=FALSE---------------------------------------------------
#  library("barplot3d")
#  #Visualize first signature
#  sig1 = laml.sig$signatures[,1]
#  barplot3d::legoplot3d(contextdata = sig1, labels = FALSE, scalexy = 0.01, sixcolors = "sanger", alpha = 0.5)

## ----annovarToMaf, eval=TRUE--------------------------------------------------
var.annovar = system.file("extdata", "variants.hg19_multianno.txt", package = "maftools")
var.annovar.maf = annovarToMaf(annovar = var.annovar, Center = 'CSI-NUS', refBuild = 'hg19', 
                               tsbCol = 'Tumor_Sample_Barcode', table = 'ensGene')


## ----icgcSimpleMutationToMAF--------------------------------------------------
#Read sample ICGC data for ESCA
esca.icgc <- system.file("extdata", "simple_somatic_mutation.open.ESCA-CN.sample.tsv.gz", package = "maftools")
esca.maf <- icgcSimpleMutationToMAF(icgc = esca.icgc, addHugoSymbol = TRUE)
#Printing first 16 columns for display convenience.
print(esca.maf[1:5,1:16, with = FALSE])

## ----prepareMutSig, eval=FALSE------------------------------------------------
#  laml.mutsig.corrected = prepareMutSig(maf = laml)
#  # Converting gene names for 1 variants from 1 genes
#  #    Hugo_Symbol MutSig_Synonym N
#  # 1:    ARHGAP35          GRLF1 1
#  # Original symbols are preserved under column OG_Hugo_Symbol.

## ----subsetMaf----------------------------------------------------------------
#Extract data for samples 'TCGA.AB.3009' and 'TCGA.AB.2933'  (Printing just 5 rows for display convenience)
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'), mafObj = FALSE)[1:5]
##Same as above but return output as an MAF object (Default behaviour)
subsetMaf(maf = laml, tsb = c('TCGA-AB-3009', 'TCGA-AB-2933'))

## ----subsetMaf2---------------------------------------------------------------
#Select all Splice_Site mutations from DNMT3A and NPM1
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE,query = "Variant_Classification == 'Splice_Site'")

#Same as above but include only 'i_transcript_name' column in the output
subsetMaf(maf = laml, genes = c('DNMT3A', 'NPM1'), mafObj = FALSE, query = "Variant_Classification == 'Splice_Site'", fields = 'i_transcript_name')

## ----subsetMaf3---------------------------------------------------------------
#Select all samples with FAB clasification M4 in clinical data 
laml_m4 = subsetMaf(maf = laml, clinQuery = "FAB_classification %in% 'M4'")

## ----sampleSwaps, eval=FALSE--------------------------------------------------
#  #Path to BAM files
#  bams = c(
#    "DBW-40-N.bam",
#    "DBW-40-1T.bam",
#    "DBW-40-2T.bam",
#    "DBW-40-3T.bam",
#    "DBW-43-N.bam",
#    "DBW-43-1T.bam"
#  )
#  
#  res = maftools::sampleSwaps(bams = bams, build = "hg19")
#  # Fetching readcounts from BAM files..
#  # Summarizing allele frequncy table..
#  # Performing pairwise comparison..
#  # Done!

## ----pairwise_comparison, eval=FALSE------------------------------------------
#   res$pairwise_comparison

## -----------------------------------------------------------------------------
# X_bam     Y_bam concordant_snps discordant_snps fract_concordant_snps  cor_coef XY_possibly_paired
#  1: DBW-40-1T DBW-40-2T            5488             571             0.9057600 0.9656484                Yes
#  2: DBW-40-1T DBW-40-3T            5793             266             0.9560984 0.9758083                Yes
#  3: DBW-40-1T  DBW-43-N            5534             525             0.9133520 0.9667620                Yes
#  4: DBW-40-2T DBW-40-3T            5853             206             0.9660010 0.9817475                Yes
#  5: DBW-40-2T  DBW-43-N            5131             928             0.8468394 0.9297096                Yes
#  6: DBW-40-3T  DBW-43-N            5334             725             0.8803433 0.9550670                Yes
#  7:  DBW-40-N DBW-43-1T            5709             350             0.9422347 0.9725684                Yes
#  8: DBW-40-1T  DBW-40-N            2829            3230             0.4669087 0.3808831                 No
#  9: DBW-40-1T DBW-43-1T            2796            3263             0.4614623 0.3755364                 No
# 10: DBW-40-2T  DBW-40-N            2760            3299             0.4555207 0.3641647                 No
# 11: DBW-40-2T DBW-43-1T            2736            3323             0.4515597 0.3579747                 No
# 12: DBW-40-3T  DBW-40-N            2775            3284             0.4579964 0.3770581                 No
# 13: DBW-40-3T DBW-43-1T            2753            3306             0.4543654 0.3721022                 No
# 14:  DBW-40-N  DBW-43-N            2965            3094             0.4893547 0.3839140                 No
# 15: DBW-43-1T  DBW-43-N            2876            3183             0.4746658 0.3797829                 No

## ----BAM_matches, eval=FALSE--------------------------------------------------
#  res$BAM_matches

## -----------------------------------------------------------------------------
# [[1]]
# [1] "DBW-40-1T" "DBW-40-2T" "DBW-40-3T" "DBW-43-N" 
# 
# [[2]]
# [1] "DBW-40-2T" "DBW-40-3T" "DBW-43-N" 
# 
# [[3]]
# [1] "DBW-40-3T" "DBW-43-N" 
# 
# [[4]]
# [1] "DBW-40-N"  "DBW-43-1T"

## ----cor, eval=FALSE----------------------------------------------------------
#  cor_table = cor(res$AF_table)

## ----cortable, echo=FALSE-----------------------------------------------------
cor_table = structure(c(1, 0.971671361359591, 0.982608032160979, 0.381966662753787, 
0.380812832617918, 0.97246945978859, 0.971671361359591, 1, 0.988268712494756, 
0.365843547670381, 0.364768267525972, 0.933880555972565, 0.982608032160979, 
0.988268712494756, 1, 0.376165783421095, 0.375700923176265, 0.959717559987264, 
0.381966662753787, 0.365843547670381, 0.376165783421095, 1, 0.979259788301874, 
0.385257303482557, 0.380812832617918, 0.364768267525972, 0.375700923176265, 
0.979259788301874, 1, 0.384975386482912, 0.97246945978859, 0.933880555972565, 
0.959717559987264, 0.385257303482557, 0.384975386482912, 1), .Dim = c(6L, 
6L), .Dimnames = list(c("DBW-40-1T", "DBW-40-2T", "DBW-40-3T", 
"DBW-40-N", "DBW-43-1T", "DBW-43-N"), c("DBW-40-1T", "DBW-40-2T", 
"DBW-40-3T", "DBW-40-N", "DBW-43-1T", "DBW-43-N")))

## ----pheatmap2, fig.width=6, fig.height=4-------------------------------------
pheatmap::pheatmap(cor_table, breaks = seq(0, 1, 0.01))

## ----tcgaAvailable------------------------------------------------------------
tcga_avail = tcgaAvailable()
head(tcga_avail, 3)

## ----tcgaLoad-----------------------------------------------------------------
# By default MAF from MC3 project will be loaded
laml_mc3 = tcgaLoad(study = "LAML")
laml_mc3

# Change the source to Firehose
laml_fh = tcgaLoad(study = "LAML", source = "Firehose")
laml_fh

## ----eval=FALSE---------------------------------------------------------------
#  BiocManager::install(pkgs = "PoisonAlien/TCGAmutations")

## ----maf2mae------------------------------------------------------------------
laml_mae = maf2mae(m = laml)
laml_mae

## -----------------------------------------------------------------------------
sessionInfo()