0.1 Preamble
1 Introduction
- 1.0.1 Setup
- 1.1 Retrieving the data-set
  - 1.1.1 Create the COTAN object
  - 1.1.2 Drop low quality cells
2 Establish genes’ clusters

0.1 Preamble

library(COTAN)
library(zeallot)

# necessary to solve precedence of overloads
conflicted::conflict_prefer("%<-%", "zeallot")

# enable multi-processing (not on Windows)
prevOptState <- options(parallelly.fork.enable = TRUE)

1 Introduction

This tutorial shows the available tools in COTAN to help with genes’ clustering

1.0.1 Setup

Define a directory where the data and the output will be stored

dataDir <- file.path(tempdir(), "COTAN_vignette_data")
dir.create(dataDir, recursive = TRUE, showWarnings = FALSE)

print(dataDir)
#> [1] "/tmp/RtmpDqd2Tg/COTAN_vignette_data"

outDir <- dataDir

# Log-level 2 was chosen to showcase better how the package works
# In normal usage a level of 0 or 1 is more appropriate
setLoggingLevel(2L)
#> Setting new log level to 2

# This file will contain all the logs produced by the package
# as if at the highest logging level
setLoggingFile(file.path(outDir, "vignette_genes_clustering.log"))
#> Setting log file to be: /tmp/RtmpDqd2Tg/COTAN_vignette_data/vignette_genes_clustering.log

1.1 Retrieving the data-set

Download the data-set for mouse cortex E17.5 from Yuzwa et al. (2017)

GEO <- "GSM2861514"

dir.create(file.path(dataDir, GEO), showWarnings = FALSE)

datasetFileName <-
  file.path(dataDir, GEO, "GSM2861514_E175_All_Cells_DGE.txt.gz")


# retries up to 5 times to get the dataset
attempts <- 0L
maxAttempts <- 5L
ok <- FALSE

while (attempts < maxAttempts && !ok) {
  attempts <- attempts + 1L

  if (!file.exists(datasetFileName)) {
    res <- try(
      GEOquery::getGEOSuppFiles(
        GEO = GEO,
        makeDirectory = TRUE,
        baseDir = dataDir,
        filter_regex = base::basename(datasetFileName),
        fetch_files = TRUE
      ),
      silent = TRUE
    )
  }

  ok <- file.exists(datasetFileName)

  if (!ok && attempts < maxAttempts) {
    Sys.sleep(1)
  }
}

assertthat::assert_that(
  ok,
  msg = paste0(
    "Failed to retrieve file '", datasetFileName,
    "' after ", maxAttempts, " attempts."
  )
)
#> [1] TRUE

rawDataset <- read.csv(datasetFileName, sep = "\t", row.names = 1L)

print(dim(rawDataset))
#> [1] 17085  2000

1.1.1 Create the COTAN object

Initialize the COTAN object with the row count table and the metadata from the experiment.

cond <- "mouse_cortex_E17.5"

obj <- COTAN(raw = rawDataset)
obj <-
  initializeMetaDataset(
    obj,
    GEO = GEO,
    sequencingMethod = "Drop_seq",
    sampleCondition = cond
  )
#> Initializing `COTAN` meta-data

logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])),
        logLevel = 1L)
#> Condition mouse_cortex_E17.5

Assign to each cell its origin

# mark cells origin
data(vignette.cells.origin)
head(vignette.cells.origin, 18)
#> GCGAATTGTGAA ACCGATAACTGA CCCCGGGTGCGA CTGTAGATGTTA TCATCGAAGCGC TTCTACCGAGTC 
#>        Other        Other        Other        Other        Other        Other 
#> CTGTTCCCGGCG GCGTGTTAGTTC CTCGCGCGTTTA GATGTATAACTT GCGCTATGATTT CGTTTAGTTTAC 
#>        Other        Other        Other        Other        Other     Cortical 
#> GTGGAGGCCCAT TGTCACTACATC TCTAGAACAACG ACCTTTGTTCGT TTGTCTTCTTCG TAAAATATCGCC 
#>        Other        Other     Cortical     Cortical     Cortical     Cortical 
#> Levels: Cortical Other

obj <- 
  addCondition(obj, condName = "origin", conditions = vignette.cells.origin)

rm(vignette.cells.origin)

1.1.2 Drop low quality cells

# use previously established results to determine
# which cells were dropped in the cleaning stage
data(vignette.split.clusters)

cellsToDrop <-
  getCells(obj)[!(getCells(obj) %in% names(vignette.split.clusters))]

obj <- dropGenesCells(obj, cells = cellsToDrop)

# Log the remaining number of cells

logThis(paste("n cells", getNumCells(obj)), logLevel = 1L)
#> n cells 1783

table(getCondition(obj, condName = "origin"))
#> 
#> Cortical    Other 
#>      844      939

rm(vignette.split.clusters)

2 Establish genes’ clusters

2.1 COTAN analysis

In this part, the COTAN model is calibrated: all the contingency tables are computed and used to get the statistics necessary to COEX evaluation and storing

obj <-
  proceedToCoex(
    obj,
    calcCoex = TRUE,
    optimizeForSpeed = TRUE,
    cores = 3L,
    deviceStr = "cuda"
  )
#> COTAN dataset analysis: START
#> Genes/cells selection done: dropped [4330] genes and [0] cells
#> Working on [12755] genes and [1783] cells
#> Estimate `dispersion`: START
#> Total calculations elapsed time: 9.44159293174744
#> Estimate `dispersion`: DONE
#> Estimate `dispersion`: DONE
#> `dispersion` | min: -0.0349260648055935 | max: 760.574661197634 | % negative: 12.5362602900823
#> COTAN genes' COEX estimation: START
#> While trying to load the `torch` library Error in doTryCatch(return(expr), name, parentenv, handler): The `torch` library is installed but the required additional libraries are not available yet
#> Warning in value[[3L]](cond): The `torch` library is installed, but might
#> require further initialization
#> Warning in value[[3L]](cond): Please look at the `torch` package installation
#> guide to complete the installation
#> Warning in canUseTorch(optimizeForSpeed, deviceStr): Falling back to legacy
#> [non-torch] code.
#> Calculate genes' COEX (legacy): START
#> Warning in asMethod(object): sparse->dense coercion: allocating vector of size
#> 1.2 GiB
#> Total calculations elapsed time: 314.84135055542
#> Calculate genes' COEX (legacy): DONE
#> COTAN dataset analysis: DONE

2.1.1 Get data tables and stored `COEX`

Sometimes we can also be interested in the numbers present directly in the contingency tables for two specific genes.

cat("Contingency Tables:")
#> Contingency Tables:
print(contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b"))
#> $observed
#>            Satb2.yes Satb2.no
#> Bcl11b.yes        62      565
#> Bcl11b.no        308      848
#> 
#> $expected
#>            Satb2.yes Satb2.no
#> Bcl11b.yes   138.583  488.417
#> Bcl11b.no    231.417  924.583

cat(paste("Corresponding COEX value:",
          getGenesCoex(obj, zeroDiagonal = FALSE)["Satb2", "Bcl11b"]))
#> Corresponding COEX value: -0.219641253268044

Another useful function is getGenesCoex(). This can be used to extract the whole or a partial COEX matrix from a COTAN object.

# Extract the whole matrix
coex <- getGenesCoex(obj, zeroDiagonal = FALSE)
coex[1000L:1005L, 1000L:1005L]
#> 6 x 6 Matrix of class "dspMatrix"
#>              Ankrd6      Ankrd61        Anks1       Anks1b        Anks3
#> Ankrd6  0.919828628  0.006022968  0.039727285  0.019217926  0.022676686
#> Ankrd61 0.006022968  0.304643397  0.012120271  0.012826424  0.002818821
#> Anks1   0.039727285  0.012120271  0.907300176 -0.011094145 -0.006198863
#> Anks1b  0.019217926  0.012826424 -0.011094145  0.919554726 -0.017552746
#> Anks3   0.022676686  0.002818821 -0.006198863 -0.017552746  0.905659149
#> Anks6   0.014713461 -0.004394969 -0.021761229 -0.007003361  0.029217368
#>                Anks6
#> Ankrd6   0.014713461
#> Ankrd61 -0.004394969
#> Anks1   -0.021761229
#> Anks1b  -0.007003361
#> Anks3    0.029217368
#> Anks6    0.513438755

# For a partial matrix
coex_sm <- getGenesCoex(obj, genes = c("Satb2", "Bcl11b", "Fezf2"),
                        zeroDiagonal = FALSE)
coex_sm[2001:2010, ]
#> 10 x 3 Matrix of class "dgeMatrix"
#>              Bcl11b        Fezf2         Satb2
#> Ccdc91 -0.007340495 -0.015017365  0.0081678484
#> Ccdc92  0.006863524 -0.002183516  0.0121977478
#> Ccdc93 -0.009075446  0.032998076 -0.0185992406
#> Ccdc94 -0.025712653 -0.013185117  0.0153650072
#> Ccdc96 -0.028979603 -0.015681037  0.0484463876
#> Ccdc97 -0.029885047 -0.013649152 -0.0390277387
#> Cchcr1 -0.015829490  0.019683483  0.0005360145
#> Cck    -0.040912312  0.049164448  0.0376753146
#> Ccl12  -0.030998730 -0.011044525 -0.0124730051
#> Ccl25  -0.037731486 -0.006071913  0.0028277194

2.2 Analysis of the elaborated data: `GDI`

To calculate and store the Global Differentiation Index (GDI) use the following:

gdiDF <- calculateGDI(obj, cores = 3L)
#> Calculate GDI dataframe: START
#> Calculate `GDI`: START
#> Total calculations elapsed time: 7.89786243438721
#> Calculate `GDI`: DONE
#> Total calculations elapsed time: 18.1735029220581
#> Calculate GDI dataframe: DONE

# Show the most *differentially expressed* genes
perm <- order(gdiDF[["GDI"]], decreasing = TRUE)
head(gdiDF[perm, ], n = 10)
#>               sum.raw.norm      GDI exp.cells
#> Mki67             2.400195 2.810177  6.393718
#> Hmgb2             2.669711 2.789769 10.095345
#> 2810417H13Rik     2.373125 2.773485  6.505889
#> Pbk               2.189592 2.716527  5.496354
#> Prc1              2.357143 2.699208  5.384184
#> Dbi               3.145560 2.687651 23.331464
#> Ccnd2             3.220094 2.687415 21.817162
#> Vim               2.711821 2.670697 14.638250
#> Mfge8             2.370640 2.664075  5.384184
#> Cdca8             2.260013 2.658388  6.169377

# This will store only the $GDI column
obj <- storeGDI(obj, genesGDI = gdiDF)

The other two columns return the sum of the normalized data rows and the percentage of cells expressing the gene respectively.

2.2.1 Define genes lists

genesLists <- list(
  # Neural Progenitor Genes
  "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"),
  # Pan Neural Genes
  "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"),
  # House Keeping
  "hk"   = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a",
             "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1",
             "Tars", "Amacr")
)

2.2.2 `GDI` Plot

The next function can either plot the GDI from the dataset directly or use the already stored data.

It marks the given threshold 1.40 (in red) and the 50% and 75% quantiles (in blue). We can also specify some gene sets (three in this case) that we want to label explicitly in the GDI plot.

GDIPlot(obj, genes = genesLists, GDIThreshold = 1.40)
#> GDI plot
#> Removed 0 low GDI genes (such as the fully-expressed) in GDI plot

In the picture above it is possible to see how, as expected, the house-keeping genes (hk) have generally a low GDI value.

2.2.3 Store the calibrated object for future analysis

# saving the structure
saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS")))

2.3 Heatmaps

To perform the Gene Pair Analysis, we can plot a heatmap of the COEX values between two gene sets.

We have to define the different gene sets (list.genes) in a list. The first set will be used to define the columns; then we can choose which sets to use in the function parameter sets (for example, from 1 to 3).

plot(heatmapPlot(obj, genesLists = genesLists, sets = 1L:3L, cores = 3L))
#> Heatmap plot: START
#> Hangling COTAN object with condition: mouse_cortex_E17.5
#> calculating PValues: START
#> Get p-values on a set of genes on columns and on a set of genes on rows
#> Total calculations elapsed time: 4.66432595252991
#> calculating PValues: DONE
#> min COEX: -0.335360548131137 max COEX: 0.329177031378561
#> Total calculations elapsed time: 4.75778269767761
#> Heatmap plot: DONE

layersGenes <- list(
  "L1"   = c("Reln",   "Lhx5"),
  "L2/3" = c("Cux1",   "Satb2"),
  "L4"   = c("Rorb",   "Sox5"),
  "L5/6" = c("Bcl11b", "Fezf2"),
  "Prog" = c("Hes1",    "Vim", "Dummy")
)

We can also plot a general heatmap of COEX values based on some markers like the following one.

layersMarkers <- c("Satb2", "Bcl11b", "Vim", "Hes1")

genesHeatmapPlot(
  obj,
  primaryMarkers = layersMarkers,
  pValueThreshold = 0.0001,
  symmetric = TRUE,
  cores = 3L
)
#> calculating PValues: START
#> Get p-values genome wide on columns and genome wide on rows
#> Total calculations elapsed time: 33.9740753173828
#> calculating PValues: DONE

genesHeatmapPlot(
  obj,
  primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"),
  secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"),
  pValueThreshold = 0.001,
  symmetric = FALSE,
  cores = 3L
)

2.4 Establishing genes’ clusters

COTAN provides a way to establish genes’ clusters given some lists of markers

c(gSpace, eigPlot, pcaGenesClDF, treePlot) %<-%
  establishGenesClusters(
    obj,
    groupMarkers = layersGenes,
    numGenesPerMarker = 25L,
    kCuts = 5L
  )
#> Establishing gene clusters - START
#> Calculating gene co-expression space - START
#> calculating PValues: START
#> Get p-values on a set of genes on columns and genome wide on rows
#> Total calculations elapsed time: 5.65402913093567
#> calculating PValues: DONE
#> Number of selected secondary markers: 190
#> Calculate `GDI`: START
#> Total calculations elapsed time: 0.243463516235352
#> Calculate `GDI`: DONE
#> Total calculations elapsed time: 12.6099414825439
#> Calculating gene co-expression space - DONE
#> Total calculations elapsed time: 13.1466805934906
#> Establishing gene clusters - DONE

plot(eigPlot)

plot(treePlot)

2.4.1 `UMAP` plot

colSelection <- vapply(pcaGenesClDF, is.numeric, logical(1L))

genesUmapPl <-
  UMAPPlot(
    pcaGenesClDF[, colSelection, drop = FALSE],
    clusters = getColumnFromDF(pcaGenesClDF, "hclust"),
    elements = layersGenes,
    title = "Genes' clusters UMAP Plot",
    numNeighbors = 32L,
    minPointsDist = 0.25
  )
#> UMAP plot: START
#> 18:10:23 UMAP embedding parameters a = 1.121 b = 1.057
#> 18:10:23 Read 1276 rows and found 12 numeric columns
#> 18:10:23 Using Annoy for neighbor search, n_neighbors = 32
#> 18:10:23 Building Annoy index with metric = cosine, n_trees = 50
#> 0%   10   20   30   40   50   60   70   80   90   100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 18:10:23 Writing NN index file to temp file /tmp/RtmpDqd2Tg/file19c70860baf5c5
#> 18:10:23 Searching Annoy index using 1 thread, search_k = 3200
#> 18:10:24 Annoy recall = 100%
#> 18:10:25 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 32
#> 18:10:27 Initializing from normalized Laplacian + noise (using RSpectra)
#> 18:10:27 Commencing optimization for 500 epochs, with 55474 positive edges
#> 18:10:27 Using rng type: pcg
#> 18:10:29 Optimization finished
#> Total calculations elapsed time: 6.30713510513306
#> UMAP plot: DONE

plot(genesUmapPl)

2.5 Vignette clean-up stage

The next few lines are just to clean.

if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) {
  # delete file if it exists
  file.remove(file.path(outDir, paste0(cond, ".cotan.RDS")))
}
if (file.exists(file.path(outDir, paste0(cond, "_times.csv")))) {
  # delete file if it exists
  file.remove(file.path(outDir, paste0(cond, "_times.csv")))
}
if (dir.exists(file.path(outDir, cond))) {
  unlink(file.path(outDir, cond), recursive = TRUE)
}
# if (dir.exists(file.path(outDir, GEO))) {
#   unlink(file.path(outDir, GEO), recursive = TRUE)
# }

# stop logging to file
setLoggingFile("")
#> Closing previous log file - Setting log file to be:
file.remove(file.path(outDir, "vignette_genes_clustering.log"))
#> [1] TRUE

options(prevOptState)

Sys.time()
#> [1] "2026-04-28 18:10:30 EDT"

sessionInfo()
#> R version 4.6.0 RC (2026-04-17 r89917)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.24-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] zeallot_0.2.0    COTAN_2.13.0     BiocStyle_2.41.0
#> 
#> loaded via a namespace (and not attached):
#>   [1] matrixStats_1.5.0           spatstat.sparse_3.1-0      
#>   [3] httr_1.4.8                  RColorBrewer_1.1-3         
#>   [5] doParallel_1.0.17           tools_4.6.0                
#>   [7] sctransform_0.4.3           R6_2.6.1                   
#>   [9] lazyeval_0.2.3              uwot_0.2.4                 
#>  [11] ggdist_3.3.3                GetoptLong_1.1.1           
#>  [13] withr_3.0.2                 sp_2.2-1                   
#>  [15] gridExtra_2.3               parallelDist_0.2.7         
#>  [17] progressr_0.19.0            cli_3.6.6                  
#>  [19] Biobase_2.73.0              textshaping_1.0.5          
#>  [21] Cairo_1.7-0                 spatstat.explore_3.8-0     
#>  [23] fastDummies_1.7.6           labeling_0.4.3             
#>  [25] sass_0.4.10                 Seurat_5.5.0               
#>  [27] S7_0.2.2                    spatstat.data_3.1-9        
#>  [29] readr_2.2.0                 proxy_0.4-29               
#>  [31] ggridges_0.5.7              pbapply_1.7-4              
#>  [33] systemfonts_1.3.2           rentrez_1.2.4              
#>  [35] dichromat_2.0-0.1           parallelly_1.47.0          
#>  [37] limma_3.69.0                torch_0.17.0               
#>  [39] generics_0.1.4              shape_1.4.6.1              
#>  [41] ica_1.0-3                   spatstat.random_3.4-5      
#>  [43] dplyr_1.2.1                 distributional_0.7.0       
#>  [45] dendextend_1.19.1           Matrix_1.7-5               
#>  [47] S4Vectors_0.51.0            abind_1.4-8                
#>  [49] lifecycle_1.0.5             yaml_2.3.12                
#>  [51] SummarizedExperiment_1.43.0 SparseArray_1.13.0         
#>  [53] Rtsne_0.17                  grid_4.6.0                 
#>  [55] promises_1.5.0              crayon_1.5.3               
#>  [57] miniUI_0.1.2                lattice_0.22-9             
#>  [59] beachmat_2.29.0             cowplot_1.2.0              
#>  [61] magick_2.9.1                pillar_1.11.1              
#>  [63] knitr_1.51                  ComplexHeatmap_2.29.0      
#>  [65] GenomicRanges_1.65.0        rjson_0.2.23               
#>  [67] future.apply_1.20.2         codetools_0.2-20           
#>  [69] glue_1.8.1                  spatstat.univar_3.1-7      
#>  [71] data.table_1.18.2.1         vctrs_0.7.3                
#>  [73] png_0.1-9                   spam_2.11-3                
#>  [75] gtable_0.3.6                assertthat_0.2.1           
#>  [77] cachem_1.1.0                zigg_0.0.2                 
#>  [79] xfun_0.57                   S4Arrays_1.13.0            
#>  [81] mime_0.13                   Rfast_2.1.5.2              
#>  [83] Seqinfo_1.3.0               survival_3.8-6             
#>  [85] SingleCellExperiment_1.35.0 iterators_1.0.14           
#>  [87] tinytex_0.59                statmod_1.5.1              
#>  [89] fitdistrplus_1.2-6          ROCR_1.0-12                
#>  [91] nlme_3.1-169                bit64_4.8.0                
#>  [93] RcppAnnoy_0.0.23            bslib_0.10.0               
#>  [95] irlba_2.3.7                 KernSmooth_2.23-26         
#>  [97] otel_0.2.0                  colorspace_2.1-2           
#>  [99] BiocGenerics_0.59.0         tidyselect_1.2.1           
#> [101] processx_3.9.0              bit_4.6.0                  
#> [103] compiler_4.6.0              curl_7.1.0                 
#> [105] httr2_1.2.2                 xml2_1.5.2                 
#> [107] DelayedArray_0.39.0         plotly_4.12.0              
#> [109] bookdown_0.46               scales_1.4.0               
#> [111] lmtest_0.9-40               callr_3.7.6                
#> [113] rappdirs_0.3.4              stringr_1.6.0              
#> [115] digest_0.6.39               goftest_1.2-3              
#> [117] spatstat.utils_3.2-2        rmarkdown_2.31             
#> [119] GEOquery_2.81.0             XVector_0.53.0             
#> [121] coro_1.1.0                  htmltools_0.5.9            
#> [123] pkgconfig_2.0.3             MatrixGenerics_1.25.0      
#> [125] fastmap_1.2.0               rlang_1.2.0                
#> [127] GlobalOptions_0.1.4         htmlwidgets_1.6.4          
#> [129] ggthemes_5.2.0              shiny_1.13.0               
#> [131] farver_2.1.2                jquerylib_0.1.4            
#> [133] zoo_1.8-15                  jsonlite_2.0.0             
#> [135] BiocParallel_1.47.0         BiocSingular_1.29.0        
#> [137] magrittr_2.0.5              dotCall64_1.2              
#> [139] patchwork_1.3.2             Rcpp_1.1.1-1.1             
#> [141] viridis_0.6.5               reticulate_1.46.0          
#> [143] stringi_1.8.7               MASS_7.3-65                
#> [145] plyr_1.8.9                  parallel_4.6.0             
#> [147] listenv_0.10.1              ggrepel_0.9.8              
#> [149] deldir_2.0-4                splines_4.6.0              
#> [151] tensor_1.5.1                hms_1.1.4                  
#> [153] circlize_0.4.18             ps_1.9.3                   
#> [155] igraph_2.3.0                spatstat.geom_3.7-3        
#> [157] RcppHNSW_0.6.0              reshape2_1.4.5             
#> [159] stats4_4.6.0                ScaledMatrix_1.21.0        
#> [161] XML_3.99-0.23               evaluate_1.0.5             
#> [163] SeuratObject_5.4.0          RcppParallel_5.1.11-2      
#> [165] BiocManager_1.30.27         tzdb_0.5.0                 
#> [167] foreach_1.5.2               httpuv_1.6.17              
#> [169] RANN_2.6.2                  tidyr_1.3.2                
#> [171] purrr_1.2.2                 polyclip_1.10-7            
#> [173] future_1.70.0               clue_0.3-68                
#> [175] scattermore_1.2             ggplot2_4.0.3              
#> [177] conflicted_1.2.0            rsvd_1.0.5                 
#> [179] xtable_1.8-8                RSpectra_0.16-2            
#> [181] later_1.4.8                 viridisLite_0.4.3          
#> [183] ragg_1.5.2                  tibble_3.3.1               
#> [185] memoise_2.0.1               IRanges_2.47.0             
#> [187] cluster_2.1.8.2             globals_0.19.1

Guided tutorial to genes’ clustering using COTAN

28 April 2026

Package

Contents

0.1 Preamble

1 Introduction

1.0.1 Setup

1.1 Retrieving the data-set

1.1.1 Create the COTAN object

1.1.2 Drop low quality cells

2 Establish genes’ clusters

2.1 COTAN analysis

2.1.1 Get data tables and stored `COEX`

2.2 Analysis of the elaborated data: `GDI`

2.2.1 Define genes lists

2.2.2 `GDI` Plot

2.2.3 Store the calibrated object for future analysis

2.3 Heatmaps

2.4 Establishing genes’ clusters

2.4.1 `UMAP` plot

2.5 Vignette clean-up stage

Guided tutorial to genes’ clustering using COTAN

28 April 2026

Package

Contents

0.1 Preamble

1 Introduction

1.0.1 Setup

1.1 Retrieving the data-set

1.1.1 Create the COTAN object

1.1.2 Drop low quality cells

2 Establish genes’ clusters

2.1 COTAN analysis

2.1.1 Get data tables and stored COEX

2.2 Analysis of the elaborated data: GDI

2.2.1 Define genes lists

2.2.2 GDI Plot

2.2.3 Store the calibrated object for future analysis

2.3 Heatmaps

2.4 Establishing genes’ clusters

2.4.1 UMAP plot

2.5 Vignette clean-up stage

2.1.1 Get data tables and stored `COEX`

2.2 Analysis of the elaborated data: `GDI`

2.2.2 `GDI` Plot

2.4.1 `UMAP` plot