## ----fig1, fig.cap="Overview of the resources featured in OmniPath. Causal resources (including activity-flow and enzyme-substrate resources) can provide direction (*) or sign and direction (+) of interactions.", echo=FALSE----
library(knitr)
knitr::include_graphics("man/figures/page1_1.png")

## ----installation, eval=FALSE-------------------------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("OmnipathR")

## ----libraries, message=FALSE-------------------------------------------------------------------------------
library(OmnipathR)
library(igraph)
library(tidyr)
library(gprofiler2)

## ----interactions-------------------------------------------------------------------------------------------
## We check some of the different interaction databases
interaction_resources()

## The interactions are stored into a data frame.
pathways <- omnipath(resources = c("SignaLink3", "PhosphoSite", "SIGNOR"))

## We visualize the first interactions in the data frame.
print_interactions(head(pathways))

## ----sp, message=TRUE---------------------------------------------------------------------------------------
## We transform the interactions data frame into a graph
pathways_g <- interaction_graph(pathways)

## Find and print shortest paths on the directed network between proteins
## of interest:
print_path_es(
    igraph::shortest_paths(
        pathways_g,
        from = "TYRO3",
        to = "STAT3",
        output = "epath"
    )$epath[[1]],
    pathways_g
)

## Find and print all shortest paths between proteins of interest:
print_path_vs(
    igraph::all_shortest_paths(
        pathways_g,
        from = "DYRK2",
        to = "MAPKAPK2"
    )$res,
    pathways_g
)

## ----clustering, message=FALSE------------------------------------------------------------------------------
## We apply a clustering algorithm (Louvain) to group proteins in
## our network. We apply here Louvain which is fast but can only run
## on undirected graphs. Other clustering algorithms can deal with
## directed networks but with longer computational times,
## such as cluster_edge_betweenness. These cluster methods are directly
## available in the igraph package.
pathways_g_u <- igraph::as.undirected(pathways_g, mode = "mutual")
pathways_g_u <- igraph::simplify(pathways_g_u)
clusters <- igraph::cluster_fast_greedy(pathways_g_u)
## We extract the cluster where a protein of interest is contained
erbb2_cluster_id <- clusters$membership[which(clusters$names == "ERBB2")]
erbb2_cluster_g <- igraph::induced_subgraph(
    pathways_g_u,
    igraph::V(pathways_g)$name[which(clusters$membership == erbb2_cluster_id)]
)

## ----fig2, echo = FALSE, fig.cap="ERBB2 associated cluser. Subnetwork extracted from the interactions graph representing the cluster where we can find the gene *ERBB2* (yellow node)"----
## We print that cluster with its interactions.
par(mar = c(0.1, 0.1, 0.1, 0.1))
plot(
    erbb2_cluster_g,
    vertex.label.color = "black",
    vertex.frame.color = "#ffffff",
    vertex.size = 15,
    edge.curved = 0.2,
    vertex.color = ifelse(
        igraph::V(erbb2_cluster_g)$name == "ERBB2",
        "yellow",
        "#00CCFF"
    ),
    edge.color = "blue",
    edge.width = 0.8
)

## ----pathwayextra-------------------------------------------------------------------------------------------
## We query and store the interactions into a dataframe
iactions <-
    pathwayextra(
        resources = c("Wang", "Lit-BM-17"),
        organism = 10090  # mouse
    )

## We select all the interactions in which Amfr gene is involved
iactions_amfr <- dplyr::filter(
    iactions,
    source_genesymbol == "Amfr" |
    target_genesymbol == "Amfr"
)

## We print these interactions:
print_interactions(iactions_amfr)

## ----kinaseextra--------------------------------------------------------------------------------------------
## We query and store the interactions into a dataframe
phosphonetw <-
    kinaseextra(
        resources = c("PhosphoPoint", "PhosphoSite"),
        organism = 10116  # rat
    )

## We select the interactions in which Dpysl2 gene is a target
upstream_dpysl2 <- dplyr::filter(
    phosphonetw,
    target_genesymbol == "Dpysl2"
)

## We print these interactions:
print_interactions(upstream_dpysl2)

## ----ligrecextra--------------------------------------------------------------------------------------------
## We query and store the interactions into a dataframe
ligrec_netw <- ligrecextra(
    resources = c("iTALK", "Baccin2019"),
    organism = 9606  # human
)

## Receptors of the CDH1 ligand.
downstream_cdh1 <- dplyr::filter(
    ligrec_netw,
    source_genesymbol == "CDH1"
)

## We transform the interactions data frame into a graph
downstream_cdh1_g <- interaction_graph(downstream_cdh1)

## We induce a network with these genes
downstream_cdh1_g <- igraph::induced_subgraph(
    interaction_graph(omnipath()),
    igraph::V(downstream_cdh1_g)$name
)

## ----fig3, echo = FALSE, fig.cap="Ligand-receptor interactions for the ADM2 ligand."------------------------
## We print the induced network
par(mar = c(0.1, 0.1, 0.1, 0.1))
plot(
    downstream_cdh1_g,
    vertex.label.color = "black",
    vertex.frame.color = "#ffffff",
    vertex.size = 20,
    edge.curved = 0.2,
    vertex.color = ifelse(
        igraph::V(downstream_cdh1_g)$name == "CDH1",
        "yellow",
        "#00CCFF"
    ),
    edge.color = "blue",
    edge.width = 0.8
)

## ----dorothea-----------------------------------------------------------------------------------------------
## We query and store the interactions into a dataframe
dorothea_netw <- dorothea(
    dorothea_levels = "A",
    organism = 9606
)

## We select the most confident interactions for a given TF and we print
## the interactions to check the way it regulates its different targets
downstream_gli1  <- dplyr::filter(
    dorothea_netw,
    source_genesymbol == "GLI1"
)

print_interactions(downstream_gli1)

## ----mirnatarget--------------------------------------------------------------------------------------------
## We query and store the interactions into a dataframe
mirna_targets <- mirna_target(
    resources = c("miR2Disease", "miRDeathDB")
)

## We select the interactions where a miRNA is interacting with the TF
## used in the previous code chunk and we print these interactions.
upstream_gli1 <- dplyr::filter(
    mirna_targets,
    target_genesymbol == "GLI1"
)

print_interactions(upstream_gli1)

## We transform the previous selections to graphs (igraph objects)
downstream_gli1_g <- interaction_graph(downstream_gli1)
upstream_gli1_g <- interaction_graph(upstream_gli1)

## ----fig4, echo = FALSE, fig.cap="miRNA-TF-target network. Schematic network of the miRNA (red square nodes) targeting \textit{GLI1} (yellow node) and the genes regulated by this TF (blue round nodes)."----
## We print the union of both previous graphs
par(mar = c(0.1, 0.1, 0.1, 0.1))

gli1_g <- upstream_gli1_g %u% downstream_gli1_g

plot(
    gli1_g,
    vertex.label.color = "black",
    vertex.frame.color = "#ffffff",
    vertex.size = 20,
    edge.curved = 0.25,
    vertex.color = ifelse(
        grepl("miR", igraph::V(gli1_g)$name),
        "red",
        ifelse(
            igraph::V(gli1_g)$name == "GLI1",
            "yellow",
            "#00CCFF"
        )
    ),
    edge.color = "blue",
    vertex.shape = ifelse(
        grepl("miR",igraph::V(gli1_g)$name),
        "vrectangle",
        "circle"
    ),
    edge.width = 0.8
)

## ----small-molecules----------------------------------------------------------------------------------------
trametinib_targets <- small_molecule(sources = "TRAMETINIB")
print_interactions(trametinib_targets)

## ----PTMs---------------------------------------------------------------------------------------------------
## We check the different PTMs databases
enzsub_resources()

## We query and store the enzyme-PTM interactions into a dataframe.
## No filtering by databases in this case.
enzsub <- enzyme_substrate()

## We can select and print the reactions between a specific kinase and
## a specific substrate
print_interactions(dplyr::filter(
    enzsub,
    enzyme_genesymbol == "MAP2K1",
    substrate_genesymbol == "MAPK3"
))

## In the previous results, we can see that enzyme-PTM relationships do not
## contain sign (activation/inhibition). We can generate this information
## based on the protein-protein OmniPath interaction dataset.
interactions <- omnipath()
enzsub <- signed_ptms(enzsub, interactions)

## We select again the same kinase and substrate. Now we have information
## about inhibition or activation when we print the enzyme-PTM relationships
print_interactions(
    dplyr::filter(
        enzsub,
        enzyme_genesymbol=="MAP2K1",
        substrate_genesymbol=="MAPK3"
    )
)

## We can also transform the enzyme-PTM relationships into a graph.
enzsub_g <- enzsub_graph(enzsub = enzsub)

## We download PTMs for mouse
enzsub <- enzyme_substrate(
    resources = c("PhosphoSite", "SIGNOR"),
    organism = 10090
)

## ----complexes----------------------------------------------------------------------------------------------
## We check the different complexes databases
complex_resources()

## We query and store complexes from some sources into a dataframe.
the_complexes <- complexes(resources = c("CORUM", "hu.MAP"))

## We check all the molecular complexes where a set of genes participate
query_genes <- c("WRN", "PARP1")

## Complexes where any of the input genes participate
wrn_parp1_complexes <- unique(
    complex_genes(
        the_complexes,
        genes = query_genes,
        all_genes = FALSE
    )
)

## We print the components of the different selected components
head(wrn_parp1_complexes$components_genesymbols, 6)

## Complexes where all the input genes participate jointly
complexes_query_genes_join <- unique(
    complex_genes(
        the_complexes,
        genes = query_genes,
        all_genes = TRUE
    )
)

## We print the components of the different selected components
complexes_query_genes_join$components_genesymbols

## ----enrichment---------------------------------------------------------------------------------------------
wrn_parp1_cplx_genes <- unlist(
    strsplit(wrn_parp1_complexes$components_genesymbols, "_")
)

## We can perform an enrichment analyses with the genes in the complex
enrichment <- gprofiler2::gost(
    wrn_parp1_cplx_genes,
    significant = TRUE,
    user_threshold = 0.001,
    correction_method = "fdr",
    sources = c("GO:BP", "GO:CC", "GO:MF")
)

## We show the most significant results
enrichment$result %>%
    dplyr::select(term_id, source, term_name, p_value) %>%
    dplyr::top_n(5, -p_value)

## ----complex_annotations------------------------------------------------------------------------------------
## We check the different annotation databases
annotation_resources()

## We can further investigate the features of the complex selected
## in the previous section.

## We first get the annotations of the complex itself:
cplx_annot <- annotations(
    proteins = paste0("COMPLEX:", wrn_parp1_complexes$components_genesymbols)
)

head(dplyr::select(cplx_annot, source, label, value), 10)

## ----annotations_components---------------------------------------------------------------------------------
comp_annot <- annotations(
    proteins = wrn_parp1_cplx_genes,
    resources = "NetPath"
)

dplyr::select(comp_annot, genesymbol, value)

## ----subcell_loc--------------------------------------------------------------------------------------------
subcell_loc <- annotations(
    proteins = wrn_parp1_cplx_genes,
    resources = "ComPPI"
)

## ----annot_spread-------------------------------------------------------------------------------------------
tidyr::spread(subcell_loc, label, value) %>%
dplyr::arrange(desc(score)) %>%
dplyr::top_n(10, score)

## ----annot_wide---------------------------------------------------------------------------------------------
signalink_pathways <- annotations(
    resources = "SignaLink_pathway",
    wide = TRUE
)

## ----intercell----------------------------------------------------------------------------------------------
## We check some of the different intercell categories
intercell_generic_categories()

## We import the intercell data into a dataframe
intercell_loc <- intercell(
    scope = "generic",
    aspect = "locational"
)

## We check the intercell annotations for the individual components of
## our previous complex. We filter our data to print it in a good format
dplyr::filter(intercell_loc, genesymbol %in% wrn_parp1_cplx_genes) %>%
dplyr::distinct(genesymbol, parent, .keep_all = TRUE) %>%
dplyr::select(category, genesymbol, parent) %>%
dplyr::arrange(genesymbol)

## ----intercell_quality--------------------------------------------------------------------------------------
icn <- intercell_network(high_confidence = TRUE)

## ----intercell_filter---------------------------------------------------------------------------------------
icn <-
    intercell_network() %>%
    filter_intercell_network(
        min_curation_effort = 1,
        consensus_percentile = 33
    )

## ----close_dev----------------------------------------------------------------------------------------------
## We close graphical connections
while (!is.null(dev.list()))  dev.off()

## ----sessionInfo, echo=FALSE--------------------------------------------------------------------------------
sessionInfo()