## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("funOmics")

## ----eval=FALSE---------------------------------------------------------------
#  if (!require("devtools", quietly = TRUE))
#      install.packages("devtools")
#  
#  devtools::install_github("elisagdelope/funOmics")

## -----------------------------------------------------------------------------
library(funOmics)

## ----eval=FALSE---------------------------------------------------------------
#  ?funOmics::get_kegg_sets

## ----eval=FALSE---------------------------------------------------------------
#  ?funOmics::short_sets_detail

## -----------------------------------------------------------------------------
hsa_kegg_sets_entrez <- get_kegg_sets()
head(hsa_kegg_sets_entrez)

## -----------------------------------------------------------------------------
hsa_kegg_sets_symbol <- get_kegg_sets(geneid_type = "symbol")
hsa_kegg_sets_symbol[1]

## -----------------------------------------------------------------------------
hsa_kegg_sets_ensembl <- get_kegg_sets(geneid_type = "ensembl")
hsa_kegg_sets_ensembl[1]

## -----------------------------------------------------------------------------
ecoli_kegg_sets <- get_kegg_sets(organism = "ecj")
head(ecoli_kegg_sets)

## ----eval=FALSE---------------------------------------------------------------
#  ?funOmics::summarize_pathway_level

## -----------------------------------------------------------------------------
library(SummarizedExperiment)
library(airway)
data(airway)
airway

## -----------------------------------------------------------------------------
assayNames(airway)

## -----------------------------------------------------------------------------
head(assay(airway, "counts"))
dim(assay(airway, "counts"))

## -----------------------------------------------------------------------------
kegg_sets <- get_kegg_sets(geneid_type = "ensembl")
head(kegg_sets)

## -----------------------------------------------------------------------------
pathway_activity_pca <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "pca")

## -----------------------------------------------------------------------------
print(head(pathway_activity_pca))

## -----------------------------------------------------------------------------
print(any(assay(airway, "counts")[rowSums(assay(airway, "counts")) <0, ]))
X <- assay(airway, "counts")[rowSums(assay(airway, "counts")) >= 10, ]

## -----------------------------------------------------------------------------
pathway_activity_nmf <- summarize_pathway_level(X, kegg_sets, type = "nmf") # note that the NMF operation can take some minutes
print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity_nmf), ",", ncol(pathway_activity_nmf)))

## -----------------------------------------------------------------------------
head(pathway_activity_nmf)

## -----------------------------------------------------------------------------
library(MultiAssayExperiment)
assays_list <- list( counts = assay(airway, "counts"), kegg_nmf_agg = pathway_activity_nmf, kegg_pca_agg = pathway_activity_pca)
airwayMultiAssay <- MultiAssayExperiment(experiments=assays_list)
colData(airwayMultiAssay) <- colData(airway)
airwayMultiAssay

## -----------------------------------------------------------------------------
min <- 12
pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "mean", minsize = min)
print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity)))

## -----------------------------------------------------------------------------
print(head(pathway_activity))

## -----------------------------------------------------------------------------
short_sets <- short_sets_detail(kegg_sets, min)
print(short_sets$short_sets)

## -----------------------------------------------------------------------------
print(short_sets$short_sets_lengths)

## -----------------------------------------------------------------------------
print(short_sets$short_sets_molecules)

## -----------------------------------------------------------------------------
min <- 15
pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "sd", minsize = min)
print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity)))
head(pathway_activity)

## -----------------------------------------------------------------------------
min <- 7
pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "median", minsize = min)
print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity)))
head(pathway_activity)

## -----------------------------------------------------------------------------
pathway_activity <- summarize_pathway_level(assay(airway, "counts"), kegg_sets, type = "ttest", minsize = 15)
print(paste("Pathway activity score matrix has dimensions:", nrow(pathway_activity), ",", ncol(pathway_activity)))

## -----------------------------------------------------------------------------
print(head(pathway_activity))

## -----------------------------------------------------------------------------
goccdb <- "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.5/c5.go.cc.v7.5.entrez.gmt"
downdb <- sapply(readLines(goccdb), function(x) strsplit(x, "\t")[[1]])
gocc_sets <- sapply(as.matrix(downdb), function(x) x[3:length(x)])
names(gocc_sets) = sapply(as.matrix(downdb), function(x) x[1])
gocc_sets[1:3]

## -----------------------------------------------------------------------------
min <- 8
short_sets <- short_sets_detail(gocc_sets, min)
print(head(short_sets$short_sets_molecules))

## -----------------------------------------------------------------------------
# Example usage:
set.seed(1)
g <- 10000
s <- 20
X_expr <- matrix(abs(rnorm(g * s)), nrow = g, dimnames = list(1:g, paste0("s", 1:s)))
print(paste("Dimensions of omics matrix X:", dim(X_expr)[1], "*", dim(X_expr)[2]))
head(X_expr)

## -----------------------------------------------------------------------------
sd_gocc_expr <- summarize_pathway_level(X_expr, gocc_sets, type="sd", minsize=8)
head(sd_gocc_expr)

## -----------------------------------------------------------------------------
sI <- sessionInfo()
print(sI, locale = FALSE)