## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  message = FALSE
)

## ----install, eval=FALSE------------------------------------------------------
#  # install from bioconductor
#  BiocManager::install('segmenter')
#  
#  # install from github
#  remotes::install_github('MahShaaban/segmenter@devel')

## ----load_library-------------------------------------------------------------
# load library
library(segmenter)

## ----prepare_directories, message = FALSE-------------------------------------
# locate input and annotation files
inputdir <- system.file('extdata/SAMPLEDATA_HG18',
                        package = 'segmenter')
coordsdir <- system.file('extdata/COORDS',
                         package = 'chromhmmData')
anchorsdir <- system.file('extdata/ANCHORFILES',
                          package = 'chromhmmData')
chromsizefile <- system.file('extdata/CHROMSIZES',
                             'hg18.txt',
                             package = 'chromhmmData')

## ----getting_stated-----------------------------------------------------------
# run command
obj <- learn_model(inputdir = inputdir,
                   coordsdir = coordsdir,
                   anchorsdir = anchorsdir,
                   chromsizefile = chromsizefile,
                   numstates = 3,
                   assembly = 'hg18',
                   cells = c('K562', 'GM12878'),
                   annotation = 'RefSeq',
                   binsize = 200)

## ----show---------------------------------------------------------------------
# show the object
show(obj)

## ----setup--------------------------------------------------------------------
# load required libraries
library(segmenter)
library(Gviz)
library(ComplexHeatmap)
library(TxDb.Hsapiens.UCSC.hg18.knownGene)

## ----genomic_annotations------------------------------------------------------
# coordinates
coordsdir <- system.file('extdata/COORDS',
                         package = 'chromhmmData')

list.files(file.path(coordsdir, 'hg18'))

# anchors
anchorsdir <- system.file('extdata/ANCHORFILES',
                          package = 'chromhmmData')

list.files(file.path(anchorsdir, 'hg18'))

# chromosomes' sizes
chromsizefile <- system.file('extdata/CHROMSIZES',
                             'hg18.txt',
                              package = 'chromhmmData')

readLines(chromsizefile, n = 3)

## ----cellmarkfiletable--------------------------------------------------------
# a table to assign marker and cell names to the bam files
cellmarkfiletable <- system.file('extdata',
                                 'cell_mark_table.tsv',
                                 package = 'segmenter')

readLines(cellmarkfiletable, n = 3)

## ----binary_inputs------------------------------------------------------------
# locate input and output
inputdir <- system.file("extdata", package = "bamsignals")
outputdir <- tempdir()

## ----binarize-----------------------------------------------------------------
# run command
binarize_bam(inputdir,
             chromsizefile = chromsizefile,
             cellmarkfiletable = cellmarkfiletable,
             outputdir = outputdir)

# show output files
example_binaries <- list.files(outputdir, pattern = '*_binary.txt')
example_binaries

# show the format of the binary file
readLines(file.path(outputdir, example_binaries[1]), n = 3)

## ----input_bins---------------------------------------------------------------
# locate input and output files
inputdir <- system.file('extdata/SAMPLEDATA_HG18',
                        package = 'segmenter')

list.files(inputdir)

## ----run_command--------------------------------------------------------------
# run command
obj <- learn_model(inputdir = inputdir,
                   coordsdir = coordsdir,
                   anchorsdir = anchorsdir,
                   outputdir = outputdir,
                   chromsizefile = chromsizefile,
                   numstates = 3,
                   assembly = 'hg18',
                   cells = c('K562', 'GM12878'),
                   annotation = 'RefSeq',
                   binsize = 200)

## ----methods------------------------------------------------------------------
# show the object
show(obj)

## ----accessors----------------------------------------------------------------
# access object slots
emission(obj)

## ----subset-------------------------------------------------------------------
# subset the segment slot
segment(obj, cell = 'K562')

## ----multiple_numstates-------------------------------------------------------
# relearn the models with 3 to 8 states
objs <- lapply(3:8,
    function(x) {
      learn_model(inputdir = inputdir,
                   coordsdir = coordsdir,
                   anchorsdir = anchorsdir,
                   chromsizefile = chromsizefile,
                   numstates = x,
                   assembly = 'hg18',
                   cells = c('K562', 'GM12878'),
                   annotation = 'RefSeq',
                   binsize = 200)
    })

## ----compare_numstats---------------------------------------------------------
# compare the models max correlation between the states
compare_models(objs)

## ----compare_likelihood-------------------------------------------------------
# compare the models likelihood
compare_models(objs, type = 'likelihood')

## ----plot_comparison,fig.align='center',fig.width=7,fig.height=4--------------
# compare models plots
par(mfrow = c(1, 2))
compare_models(objs,
               plot = TRUE,
               xlab = 'Model', ylab = 'State Correlation')
compare_models(objs, type = 'likelihood',
               plot = TRUE,
               xlab = 'Model', ylab = 'Likelihood')

## ----parameters---------------------------------------------------------------
# access object slots
emission(obj)
transition(obj)

## ----visulaize_matrices,fig.align='center',fig.height=3,fig.width=6-----------
# emission and transition plots
h1 <- plot_heatmap(obj,
                   row_labels = paste('S', 1:3),
                   name = 'Emission')

h2 <- plot_heatmap(obj,
                   type = 'transition',
                   row_labels = paste('S', 1:3),
                   column_labels = paste('S', 1:3),
                   name = 'Transition')
h1 + h2

## ----overlap------------------------------------------------------------------
# overlap enrichment
overlap(obj)

## ----visulaizing_overlap,fig.align='center',fig.height=3,fig.width=6----------
# overlap enrichment plots
plot_heatmap(obj,
             type = 'overlap',
             column_labels = c('Genome', 'CpG', 'Exon', 'Gene',
                               'TES', 'TSS', 'TSS2kb', 'laminB1lads'),
             show_heatmap_legend = FALSE)

## ----genomic_locations--------------------------------------------------------
# genomic locations enrichment
TSS(obj)
TES(obj)

## ----visualizing_genomic_locaitons,fig.align='center',fig.height=3,fig.width=7----
# genomic locations enrichment plots
h1 <- plot_heatmap(obj,
                   type = 'TSS',
                   show_heatmap_legend = FALSE)
h2 <- plot_heatmap(obj,
                   type = 'TES',
                   show_heatmap_legend = FALSE)

h1 + h2

## ----segments-----------------------------------------------------------------
# get segments
segment(obj)

## ----visulaize_segments,fig.align='center',fig.height=3,fig.width=3-----------
# gene gene coordinates
gen <- genes(TxDb.Hsapiens.UCSC.hg18.knownGene,
             filter = list(gene_id = 38))

# extend genomic region
prom <- promoters(gen,
                  upstream = 10000,
                  downstream = 10000)

# annotation track
segs1 <- segment(obj, 'K562')
atrack1 <- AnnotationTrack(segs1$K562,
                          group = segs1$K562$state,
                          name = 'K562')

segs2 <- segment(obj, 'GM12878')
atrack2 <- AnnotationTrack(segs2$GM12878,
                          group = segs2$GM12878$state,
                          name = 'GM12878')

# plot the track
plotTracks(atrack1, from = start(prom), to = end(prom))
plotTracks(atrack2, from = start(prom), to = end(prom))

## ----add_tracks,fig.align='center',fig.height=4,fig.width=4-------------------
# ideogram track
itrack <- IdeogramTrack(genome = 'hg18', chromosome = 11)

# genome axis track
gtrack <- GenomeAxisTrack()

# gene region track
data("geneModels")
grtrack <- GeneRegionTrack(geneModels,
                           genom = 'hg18',
                           chromosome = 11,
                           name = 'ACAT1')

# put all tracks together
plotTracks(list(itrack, gtrack, grtrack, atrack1, atrack2),
           from = min(start(prom)),
           to = max(end(gen)),
           groupAnnotation = 'group')

## ----segment_frequency--------------------------------------------------------
# get segment frequency
get_frequency(segment(obj), tidy = TRUE)

## ----plot_frequency,fig.align='center',fig.width=7,fig.height=4---------------
# frequency plots
par(mfrow=c(1, 2))
get_frequency(segment(obj),
              plot = TRUE,
              ylab = 'Segment Frequency')

get_frequency(segment(obj),
              normalize = TRUE,
              plot = TRUE,
              ylab = 'Segment Fraction')

## ----session------------------------------------------------------------------
sessionInfo()