## ----style, echo = FALSE, results = 'asis', warning=FALSE, message=FALSE------
BiocStyle::markdown()

suppressPackageStartupMessages({
  library(knitr)
  library(GenomicRanges)
  library(CNVMetrics)
})

set.seed(121444)

## ----graphCNVpro, echo = FALSE, fig.align="center", fig.cap="Copy number profiles of two mouse metastatic pancreatic organoids (M10 and M30).", out.width = '90%', results='asis'----

knitr::include_graphics("CNV_mM30_mM10_v03_Feb_08_2021_small.png")

## ----figureWorkflow, echo = FALSE, fig.align="center", fig.cap="General CNVMetrics workflow.", out.width = '100%'----

knitr::include_graphics("CNVMetrics_partial_workflow_v10.png")

## ----installDemo01, eval=FALSE------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#  
#  # The following initializes usage of Bioc devel
#  BiocManager::install(version='devel')
#  
#  BiocManager::install("CNVMetrics")

## ----figureCNVFile, echo = FALSE, fig.align="center", fig.cap="Example of a copy number file containing CNV calls.", out.width = '100%'----

knitr::include_graphics("Input_CNV_call_300ppi_v02_low_quality.jpg")

## ----demoImport01-------------------------------------------------------------
## Load required libraries
library(GenomicRanges)
library(CNVMetrics)

## Load file containing CNV calls for 10 mouse organoids
data.dir <- system.file("extdata", package="CNVMetrics")
cnv.file <- file.path(data.dir, "mousePairedOrganoids.txt")
calls <- read.table(cnv.file, header=TRUE, sep="\t")

## The CNV status calls for all samples are present in one file
## The 'state' column is required
## The chromosome Y has been removed
head(calls)

## The ID column identifies the 10 samples
unique(calls[,"ID"])

## The ID column is used to split the samples into different GRanges 
## inside a GRangesList
## The 'keep.extra.column=TRUE' parameter is needed to retained the extra 
## column 'state' that is needed for the calculation of the metrics
grl <- GenomicRanges::makeGRangesListFromDataFrame(calls, 
    split.field="ID", keep.extra.columns=TRUE)
grl

## ----demoCalculateMetric01----------------------------------------------------
## In this case, the default states (AMPLIFICATION, DELETION) are used. 
## So, the 'states' parameter doesn't have to be specified
## The 'states' parameter needs to be adjusted for user-specific states
## Ex: states=c("LOH", "gain")
metric <- calculateOverlapMetric(segmentData=grl, method="sorensen", nJobs=1)

metric

## ----demoPlot01, fig.align="center", fig.height=4.5, fig.width=4.5------------
## Create graph for the metrics related to amplified regions
plotMetric(metric, type="AMPLIFICATION")

## ----demoPlot02, fig.align="center", fig.height=4.8, fig.width=4.8------------
## Create graph for the metrics related to deleted regions
## Metric values are printed as 'display_numbers' and 'number_format' are
## arguments recognized by pheatmap() function
plotMetric(metric, type="DELETION", 
                    colorRange=c("white", "darkorange"),
                    show_colnames=TRUE,
                    display_numbers=TRUE,
                    number_format="%.2f")

## ----demoPlot03, fig.align="center", fig.height=4.8, fig.width=6--------------
## Load file containing annotations for the mouse organoids
## The mouse ID identifying the source of the sample
## The stage identifying the state (tumor vs metastasis) of the sample
data.dir <- system.file("extdata", package="CNVMetrics")
annotation.file <- file.path(data.dir, "mousePairedOrganoidsInfo.txt")
annotOrg <- read.table(annotation.file, header=TRUE, sep="\t")

## The row names must correspond to the names assigned to the rows/columns
## in the CNVMetric object
rownames(annotOrg) <- annotOrg$ID
annotOrg$ID <- NULL
all(rownames(annotOrg) == rownames(metric$AMPLIFICATION))

## Create graph for the metrics related to amplified regions
## Rows are annotated with the stage and mouse information
plotMetric(metric, type="AMPLIFICATION", 
                    colorRange=c("white", "steelblue"),
                    annotation_row=annotOrg)

## ----figureCNVFileLog2Ratio, echo = FALSE, fig.align="center", fig.cap="Example of a copy number file containing log2ratio values.", out.width = '100%'----

knitr::include_graphics("Input_CNV_log2ratio_v01_low_quality.jpg")

## ----demoImport02-------------------------------------------------------------
## Load required libraries
library(GenomicRanges)
library(CNVMetrics)

## Load file containing CNV calls for 10 mouse organoids
data.dir <- system.file("extdata", package="CNVMetrics")
cnv.file <- file.path(data.dir, "mousePairedOrganoids.txt")
calls <- read.table(cnv.file, header=TRUE, sep="\t")

## The CNV status calls for all samples are present in one file
## The 'log2ratio' column is required
## The chromosome Y has been removed
head(calls)

## The ID column identifies the 10 samples
unique(calls[,"ID"])

## The ID column is used to split the samples into different GRanges 
## inside a GRangesList
## The 'keep.extra.column=TRUE' parameter is needed to retained the extra 
## column 'state' that is needed for the calculation of the metrics
grlog <- GenomicRanges::makeGRangesListFromDataFrame(df=calls, 
    split.field="ID", keep.extra.columns=TRUE)
grlog

## ----demoCalculateMetricLog---------------------------------------------------
metricLog <- calculateLog2ratioMetric(segmentData=grlog, 
                              method="weightedEuclideanDistance", nJobs=1)

metricLog

## ----demoPlotLog01, fig.align="center", fig.height=4.5, fig.width=4.5---------
## Create graph for the metrics related to weighted Euclidean distance-based
plotMetric(metricLog)

## ----demoPlotLog02, fig.align="center", fig.height=4.8, fig.width=4.8---------
## Create graph for the weighted Euclidean distance-based metrics
## Remove title (argument main="")
## Metric values are printed as 'display_numbers' and 'number_format' are
## arguments recognized by pheatmap() function
plotMetric(metricLog, colorRange=c("white", "darkorange"),
                    show_colnames=TRUE,
                    display_numbers=TRUE,
                    number_format="%.2f",
                    main="")

## ----figureSimulationPart01, echo = FALSE, fig.align="center", fig.cap="Workflow showing the creation of a chromosome template from one reference sample.", out.width = '85%'----

knitr::include_graphics("Simulation_chromosome_workflow_part_01_v03.png")

## ----figureSimulationPart02, echo = FALSE, fig.align="center", fig.cap="Workflow showing the creation of a simulated chromosome through the use of a template.", out.width = '90%'----

knitr::include_graphics("Simulation_chromosome_workflow_part02_v03.png")

## ----demoSimulation-----------------------------------------------------------

## Load required package to generate the sample
require(GenomicRanges)

## Create one 'demo' genome with 3 chromosomes and few segments
## The stand of the regions doesn't affect the calculation of the metric
sampleRef <- GRanges(seqnames=c(rep("chr1", 4), rep("chr2", 3), rep("chr3", 6)),
     ranges=IRanges(start=c(1905048, 4554832, 31686841, 32686222,
         1, 120331, 725531, 12, 10331, 75531, 120001, 188331, 225531),
     end=c(2004603, 4577608, 31695808, 32689222, 117121,
         325555, 1225582, 9131, 55531, 100103, 158535, 211436, 275331)),
     strand="*",
     state=c("AMPLIFICATION", "NEUTRAL", "DELETION", "LOH",
         "DELETION", "NEUTRAL", "NEUTRAL", "NEUTRAL", "DELETION", "DELETION", 
         "NEUTRAL", "AMPLIFICATION", "NEUTRAL"),
     log2ratio=c(0.5849625, 0, -1, -1, -0.87777, 0, 0, 0.1, -0.9211, -0.9822, 
                  0.01, 0.9777, 0))

head(sampleRef)

## To ensure reproducibility, the seed must be fixed before running 
## the simulation method
set.seed(121)

## Generates 2 simulated genomes based on the 'demo' genome
## The ID column identify each simulation
simRes <- processSim(curSample=sampleRef, nbSim=3)

## Each simulated profile contains the same number of chromosomes as 
## the reference sample
head(simRes[simRes$ID == "S1",])

head(simRes[simRes$ID == "S2",])

head(simRes[simRes$ID == "S3",])

## ----demoGR-------------------------------------------------------------------
## First, create the GRanges objects; one per sample
gr1 <- GRanges(seqnames="chr2", ranges=IRanges(3, 6000),
          strand="+", state="AMPLIFICATION", 
          log2ratio=0.45)
gr2 <- GRanges(seqnames=c("chr1", "chr2"),
          ranges=IRanges(c(7,5555), width=c(1200, 40)),
          strand=c("+", "-"),  state=c("NEUTRAL", "AMPLIFICATION"), 
          log2ratio=c(0.034, 0.5))
gr3 <- GRanges(seqnames=c("chr1", "chr2"),
          ranges=IRanges(c(1, 5577), c(3, 5666)),
          strand=c("-", "-"), state=c("NEUTRAL", "AMPLIFICATION"), 
          log2ratio=c(0.04, 0.31))

## Then, construct a GRangesList() using all the GRanges objects
grl <- GRangesList("sample01"=gr1, "sample02"=gr2, "sample03"=gr3)

## ----demoSeed, eval=FALSE-----------------------------------------------------
#  ## First, fixe the seed value
#  set.seed(121234)
#  
#  ## Run the method to calculated the desired metrics
#  ## The number of jobs (*nJobs* parameter) can be higher than one but
#  ## have to remain the same then the calculation is redone to ensure
#  ## reproducitble results
#  metricLog <- calculateLog2ratioMetric(segmentData=grlog,
#                          method="weightedEuclideanDistance", nJobs=1)
#  

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()