Compiled date: 2023-10-24
Last edited: 2022-08-01
License: GPL-3
Run the following code to install the Bioconductor version of package.
# install.packages("BiocManager")
BiocManager::install("POMA")
library(POMA)
library(patchwork)
Let’s create a cleaned SummarizedExperiment
object from the sample st000336
data to explore the normalization effects.
# imputation using the default method KNN
example_data <- st000336 %>%
PomaImpute()
method argument is empty! KNN will be used
example_data
class: SummarizedExperiment
dim: 30 57
metadata(0):
assays(1): ''
rownames(30): x1_methylhistidine x3_methylhistidine ... pyruvate
succinate
rowData names(0):
colnames(57): DMD004.1.U02 DMD005.1.U02 ... DMD167.5.U02 DMD173.1.U02
colData names(2): group steroids
Here we will evaluate ALL normalization methods that POMA offers on the same SummarizedExperiment
object to compare them (Berg et al. 2006).
none <- PomaNorm(example_data, method = "none")
auto_scaling <- PomaNorm(example_data, method = "auto_scaling")
level_scaling <- PomaNorm(example_data, method = "level_scaling")
log_scaling <- PomaNorm(example_data, method = "log_scaling")
log_transformation <- PomaNorm(example_data, method = "log_transformation")
vast_scaling <- PomaNorm(example_data, method = "vast_scaling")
log_pareto <- PomaNorm(example_data, method = "log_pareto")
When we check for the dimension of the data after normalization we can see that ALL methods have the same effect on data dimension. PomaNorm
only change the data dimension when the data have features that only have zeros and when the data have features with 0 variance. Only in these two cases PomaNorm
will remove features of the data, changing the data dimensions.
dim(SummarizedExperiment::assay(none))
> [1] 30 57
dim(SummarizedExperiment::assay(auto_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(level_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(log_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(log_transformation))
> [1] 30 57
dim(SummarizedExperiment::assay(vast_scaling))
> [1] 30 57
dim(SummarizedExperiment::assay(log_pareto))
> [1] 30 57
Here we can evaluate the different normalization effects on samples (Berg et al. 2006).
a <- PomaBoxplots(none,
group = "samples",
jitter = FALSE) +
ggplot2::ggtitle("Not Normalized")
b <- PomaBoxplots(auto_scaling,
group = "samples",
jitter = FALSE,
legend_position = "none") +
ggplot2::ggtitle("Auto Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
c <- PomaBoxplots(level_scaling,
group = "samples",
jitter = FALSE,
legend_position = "none") +
ggplot2::ggtitle("Level Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
d <- PomaBoxplots(log_scaling,
group = "samples",
jitter = FALSE,
legend_position = "none") +
ggplot2::ggtitle("Log Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
e <- PomaBoxplots(log_transformation,
group = "samples",
jitter = FALSE,
legend_position = "none") +
ggplot2::ggtitle("Log Transformation") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
f <- PomaBoxplots(vast_scaling,
group = "samples",
jitter = FALSE,
legend_position = "none") +
ggplot2::ggtitle("Vast Scaling") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
g <- PomaBoxplots(log_pareto,
group = "samples",
jitter = FALSE,
legend_position = "none") +
ggplot2::ggtitle("Log Pareto") +
ggplot2::theme(axis.text.x = ggplot2::element_blank())
a
(b + c + d) / (e + f + g)
Here we can evaluate the different normalization effects on features.
h <- PomaDensity(none,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Not Normalized")
i <- PomaDensity(auto_scaling,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Auto Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
j <- PomaDensity(level_scaling,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Level Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
k <- PomaDensity(log_scaling,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Log Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
l <- PomaDensity(log_transformation,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Log Transformation") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
m <- PomaDensity(vast_scaling,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Vast Scaling") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
n <- PomaDensity(log_pareto,
group = "features",
legend_position = "none") +
ggplot2::ggtitle("Log Pareto") +
ggplot2::theme(axis.title.x = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank())
h
(i + j + k) / (l + m + n)
sessionInfo()
> R version 4.3.1 (2023-06-16)
> Platform: x86_64-pc-linux-gnu (64-bit)
> Running under: Ubuntu 22.04.3 LTS
>
> Matrix products: default
> BLAS: /home/biocbuild/bbs-3.18-bioc/R/lib/libRblas.so
> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
>
> locale:
> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
> [3] LC_TIME=en_GB LC_COLLATE=C
> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
> [9] LC_ADDRESS=C LC_TELEPHONE=C
> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
>
> time zone: America/New_York
> tzcode source: system (glibc)
>
> attached base packages:
> [1] stats4 stats graphics grDevices utils datasets methods
> [8] base
>
> other attached packages:
> [1] patchwork_1.1.3 SummarizedExperiment_1.32.0
> [3] Biobase_2.62.0 GenomicRanges_1.54.0
> [5] GenomeInfoDb_1.38.0 IRanges_2.36.0
> [7] S4Vectors_0.40.0 BiocGenerics_0.48.0
> [9] MatrixGenerics_1.14.0 matrixStats_1.0.0
> [11] plotly_4.10.3 ggraph_2.1.0
> [13] ggplot2_3.4.4 POMA_1.12.0
> [15] BiocStyle_2.30.0
>
> loaded via a namespace (and not attached):
> [1] splines_4.3.1 bitops_1.0-7 tibble_3.2.1
> [4] polyclip_1.10-6 hardhat_1.3.0 pROC_1.18.4
> [7] rpart_4.1.21 lifecycle_1.0.3 doParallel_1.0.17
> [10] globals_0.16.2 lattice_0.22-5 MASS_7.3-60
> [13] backports_1.4.1 magrittr_2.0.3 limma_3.58.0
> [16] sass_0.4.7 rmarkdown_2.25 jquerylib_0.1.4
> [19] yaml_2.3.7 RColorBrewer_1.1-3 lubridate_1.9.3
> [22] abind_1.4-5 zlibbioc_1.48.0 purrr_1.0.2
> [25] RCurl_1.98-1.12 nnet_7.3-19 tweenr_2.0.2
> [28] ipred_0.9-14 circlize_0.4.15 lava_1.7.2.1
> [31] GenomeInfoDbData_1.2.11 ggrepel_0.9.4 listenv_0.9.0
> [34] ellipse_0.5.0 vegan_2.6-4 RSpectra_0.16-1
> [37] parallelly_1.36.0 permute_0.9-7 codetools_0.2-19
> [40] DelayedArray_0.28.0 ggforce_0.4.1 tidyselect_1.2.0
> [43] shape_1.4.6 farver_2.1.1 viridis_0.6.4
> [46] jsonlite_1.8.7 caret_6.0-94 GetoptLong_1.0.5
> [49] e1071_1.7-13 tidygraph_1.2.3 survival_3.5-7
> [52] iterators_1.0.14 foreach_1.5.2 dbscan_1.1-11
> [55] tools_4.3.1 Rcpp_1.0.11 glue_1.6.2
> [58] rARPACK_0.11-0 prodlim_2023.08.28 gridExtra_2.3
> [61] SparseArray_1.2.0 xfun_0.40 mixOmics_6.26.0
> [64] mgcv_1.9-0 dplyr_1.1.3 withr_2.5.1
> [67] BiocManager_1.30.22 fastmap_1.1.1 fansi_1.0.5
> [70] digest_0.6.33 timechange_0.2.0 R6_2.5.1
> [73] colorspace_2.1-0 Cairo_1.6-1 utf8_1.2.4
> [76] tidyr_1.3.0 generics_0.1.3 data.table_1.14.8
> [79] recipes_1.0.8 corpcor_1.6.10 FNN_1.1.3.2
> [82] class_7.3-22 graphlayouts_1.0.1 httr_1.4.7
> [85] htmlwidgets_1.6.2 S4Arrays_1.2.0 uwot_0.1.16
> [88] ModelMetrics_1.2.2.2 pkgconfig_2.0.3 gtable_0.3.4
> [91] timeDate_4022.108 ComplexHeatmap_2.18.0 impute_1.76.0
> [94] XVector_0.42.0 htmltools_0.5.6.1 bookdown_0.36
> [97] clue_0.3-65 scales_1.2.1 png_0.1-8
> [100] gower_1.0.1 knitr_1.44 reshape2_1.4.4
> [103] rjson_0.2.21 nlme_3.1-163 proxy_0.4-27
> [106] cachem_1.0.8 GlobalOptions_0.1.2 stringr_1.5.0
> [109] parallel_4.3.1 pillar_1.9.0 grid_4.3.1
> [112] vctrs_0.6.4 randomForest_4.7-1.1 cluster_2.1.4
> [115] evaluate_0.22 magick_2.8.1 cli_3.6.1
> [118] compiler_4.3.1 rlang_1.1.1 crayon_1.5.2
> [121] future.apply_1.11.0 labeling_0.4.3 plyr_1.8.9
> [124] stringi_1.7.12 viridisLite_0.4.2 BiocParallel_1.36.0
> [127] munsell_0.5.0 lazyeval_0.2.2 glmnet_4.1-8
> [130] Matrix_1.6-1.1 glasso_1.11 future_1.33.0
> [133] statmod_1.5.0 igraph_1.5.1 broom_1.0.5
> [136] bslib_0.5.1
Berg, Robert A van den, Huub CJ Hoefsloot, Johan A Westerhuis, Age K Smilde, and Mariët J van der Werf. 2006. “Centering, Scaling, and Transformations: Improving the Biological Information Content of Metabolomics Data.” BMC Genomics 7 (1): 142.