## ----load package--------------------------------------------------------
library(cTRAP)

## ----load ENCODE samples, eval=FALSE-------------------------------------
#  gene <- "EIF4G1"
#  cellLine <- "HepG2"
#  
#  ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene)
#  table(ENCODEmetadata$`Experiment target`)
#  length(unique(ENCODEmetadata$`Experiment target`))
#  
#  ENCODEsamples <- loadENCODEsamples(ENCODEmetadata)[[1]]
#  counts <- prepareENCODEgeneExpression(ENCODEsamples)

## ----load data, include=FALSE--------------------------------------------
data("ENCODEmetadata")
data("counts")

## ----differential gene expression, eval=FALSE----------------------------
#  # Remove low coverage (at least 10 counts shared across two samples)
#  minReads   <- 10
#  minSamples <- 2
#  filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples
#  counts <- counts[filter, ]
#  
#  # Convert ENSEMBL identifier to gene symbol
#  counts$gene_id <- convertENSEMBLtoGeneSymbols(counts$gene_id)
#  
#  # Perform differential gene expression analysis
#  diffExpr <- performDifferentialExpression(counts)

## ----t-statistics, eval=FALSE--------------------------------------------
#  # Get t-statistics of differential expression with respective gene names
#  # (expected input for downstream analyses)
#  diffExprStat <- diffExpr$t
#  names(diffExprStat) <- diffExpr$Gene_symbol

## ------------------------------------------------------------------------
data("diffExprStat")

## ----CMap metadata conditions--------------------------------------------
# Download CMap metadata (if not found in current directory)
cmapMetadata <- loadCMapData("cmapMetadata.txt", type="metadata")

# Summarise conditions for all CMap perturbations
getCMapConditions(cmapMetadata)

# Summarise conditions for CMap perturbations in HepG2 cell line
getCMapConditions(cmapMetadata, cellLine="HepG2")

# Summarise conditions for a specific CMap perturbation in HepG2 cell line
getCMapConditions(
    cmapMetadata, cellLine="HepG2",
    perturbationType="Consensus signature from shRNAs targeting the same gene")

## ----CMap knockdown perturbations, eval=FALSE----------------------------
#  # Filter CMap gene knockdown HepG2 data to load afterwards
#  cmapMetadataKD <- filterCMapMetadata(
#      cmapMetadata, cellLine="HepG2",
#      perturbationType="Consensus signature from shRNAs targeting the same gene")
#  
#  # Load filtered data (if data is not found, it will be downloaded)
#  cmapPerturbationsKD <- prepareCMapPerturbations(
#      metadata=cmapMetadataKD, zscores="cmapZscores.gctx",
#      geneInfo="cmapGeneInfo.txt")

## ----CMap small molecule perturbations, eval=FALSE-----------------------
#  # Filter CMap gene small molecule HepG2 data to load afterwards
#  cmapMetadataCompounds <- filterCMapMetadata(
#      cmapMetadata, cellLine="HepG2", perturbationType="Compound")
#  
#  # Load filtered data (if data is not found, it will be downloaded)
#  cmapPerturbationsCompounds <- prepareCMapPerturbations(
#      metadata=cmapMetadataCompounds, zscores="cmapZscores.gctx",
#      geneInfo="cmapGeneInfo.txt", compoundInfo="cmapCompoundInfo.txt")

## ----load CMap perturbations, include=FALSE------------------------------
data("cmapPerturbationsKD")
data("cmapPerturbationsCompounds")
cmapPerturbationsCompounds <- cmapPerturbationsCompounds[
    , grep("HEPG2", colnames(cmapPerturbationsCompounds))]

## ----compare knockdown, message=FALSE------------------------------------
compareKD <- rankSimilarPerturbations(diffExprStat, cmapPerturbationsKD)

## ----compare small molecules, message=FALSE------------------------------
compareCompounds <- rankSimilarPerturbations(diffExprStat, 
                                             cmapPerturbationsCompounds)

## ----order knockdown perturbation comparison, fig.width=6, fig.asp=1-----
# Most positively associated perturbations (note that EIF4G1 knockdown is the
# 7th, 1st and 2nd most positively associated perturbation based on Spearman's
# correlation, Pearson's correlation and GSEA, respectively)
head(compareKD[order(spearman_rank, na.last=NA)], n=10)
head(compareKD[order(pearson_rank, na.last=NA)])
head(compareKD[order(GSEA_rank, na.last=NA)])
head(compareKD[order(rankProduct_rank, na.last=NA)])

# Most negatively associated perturbations
tail(compareKD[order(spearman_rank, na.last=NA)])
tail(compareKD[order(pearson_rank, na.last=NA)])
tail(compareKD[order(GSEA_rank, na.last=NA)])
tail(compareKD[order(rankProduct_rank, na.last=NA)])

# Plot list of compared pertubations
plot(compareKD, "spearman", n=c(10, 3))
plot(compareKD, "pearson")
plot(compareKD, "gsea")
plot(compareKD, "rankProduct")

## ----order small molecule perturbation comparison, fig.width=6, fig.asp=1----
# Most positively and negatively associated perturbations
compareCompounds[order(rankProduct_rank, na.last=NA)]
plot(compareCompounds, "rankProduct")

## ----perturbation info---------------------------------------------------
# Information on the EIF4G1 knockdown perturbation
EIF4G1knockdown <- grep("EIF4G1", compareKD[[1]], value=TRUE)
print(compareKD, EIF4G1knockdown)

# Information on the most similar compound perturbation (based on Spearman's 
# correlation)
print(compareCompounds[order(rankProduct_rank, na.last=NA)], 1)

## ----plot knockdown comparison, message=FALSE, fig.width=5, fig.asp=1----
# Plot relationship with EIF4G1 knockdown from CMap
plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, "spearman")
plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, "pearson")
plot(cmapPerturbationsKD, EIF4G1knockdown, diffExprStat, "gsea")

# Plot relationship with most negatively associated perturbation
plot(cmapPerturbationsKD,
     tail(compareKD[order(spearman_rank, na.last=NA)], 1),
     diffExprStat, "spearman")
plot(cmapPerturbationsKD,
     tail(compareKD[order(pearson_rank, na.last=NA)], 1),
     diffExprStat, "pearson")
plot(cmapPerturbationsKD,
     tail(compareKD[order(GSEA_rank, na.last=NA)], 1),
     diffExprStat, "gsea")

## ----plot small molecule comparison, message=FALSE, fig.width=5, fig.asp=1----
# Plot relationship with most positively associated perturbation
plot(cmapPerturbationsCompounds, 
     head(compareCompounds[order(spearman_rank, na.last=NA)], 1), 
     diffExprStat, "spearman")
plot(cmapPerturbationsCompounds, 
     head(compareCompounds[order(pearson_rank, na.last=NA)], 1), 
     diffExprStat,"pearson")
plot(cmapPerturbationsCompounds, 
     head(compareCompounds[order(GSEA_rank, na.last=NA)], 1), 
     diffExprStat, "gsea")

# Plot relationship with most negatively associated perturbation
plot(cmapPerturbationsCompounds,
     tail(compareCompounds[order(spearman_rank, na.last=NA)], 1),
     diffExprStat, "spearman")
plot(cmapPerturbationsCompounds, 
     tail(compareCompounds[order(pearson_rank, na.last=NA)], 1), 
     diffExprStat, "pearson")
plot(cmapPerturbationsCompounds, 
     tail(compareCompounds[order(GSEA_rank, na.last=NA)], 1), 
     diffExprStat, "gsea")

## ------------------------------------------------------------------------
gdsc      <- loadExpressionDrugSensitivityAssociation("CTRP 2.1")
predicted <- predictTargetingDrugs(diffExprStat, gdsc)
plot(predicted, method="rankProduct")

## ------------------------------------------------------------------------
# Label by compound name
plotTargetingDrugsVSsimilarPerturbations(
  predicted, compareCompounds, column="spearman_rank")
# Label by compound's gene target
plotTargetingDrugsVSsimilarPerturbations(
  predicted, compareCompounds, column="spearman_rank", labelBy="target")
# Label by compound's mechanism of action (MOA)
plotTargetingDrugsVSsimilarPerturbations(
  predicted, compareCompounds, column="spearman_rank", labelBy="moa")

## ----drug set enrichment analysis, fig.width=5, fig.asp=1----------------
descriptors <- loadDrugDescriptors("CMap", "2D")
drugSets    <- prepareDrugSets(descriptors)
dsea        <- analyseDrugSetEnrichment(drugSets, predicted)
# Plot the 6 most significant drug set enrichment results
plotDrugSetEnrichment(drugSets, predicted, selectedSets=head(dsea$pathway, 6))