## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.align = "center"
)

.DT <- function(x) {
    dt_options <- list(
        scrollX = TRUE,
        pageLength = 6,
        dom = "t",
        initComplete = DT::JS(
            "function(settings, json) {",
            "$(this.api().table().header()).css({'font-size':'10pt'});",
            "}"
        )
    )

    x %>%
        DT::datatable(options = dt_options, rownames = FALSE) %>%
        DT::formatStyle(
            columns = colnames(x),
            fontSize = "10pt"
        )
}

library(BiocStyle)

## ----eval = FALSE, include = TRUE---------------------------------------------
#  # install BiocManager if not present
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#      install.packages("BiocManager")
#  }
#  
#  # install MetMashR and dependencies
#  BiocManager::install("MetMashR")

## ----eval=TRUE, include=FALSE-------------------------------------------------
suppressPackageStartupMessages({
    # load the packages
    library(MetMashR)
    library(ggplot2)
    library(structToolbox)
    library(dplyr)
    library(DT)
})

## ----eval=FALSE, include=TRUE-------------------------------------------------
#  # load the packages
#  library(struct)
#  library(MetMashR)
#  library(metabolomicsWorkbenchR)
#  library(ggplot2)

## -----------------------------------------------------------------------------
# prepare source object
AT <- ls_source(
    source = system.file(
        paste0("extdata/MTox/LS/MTox_2023_HILIC_POS.txt"),
        package = "MetMashR"
    )
)

# read source
AT <- read_source(AT)

# show info
AT

## -----------------------------------------------------------------------------
# prepare source object
MT <- MTox700plus_database()

# read
MT <- read_source(MT)

# show
MT

## -----------------------------------------------------------------------------
# prepare source object
MT <- MTox700plus_database()

# read to data.frame
df <- read_database(MT)

# show
.DT(df)

## -----------------------------------------------------------------------------
# prepare source object
AT <- ls_source(
    source = system.file(
        paste0("extdata/MTox/LS/MTox_2023_HILIC_POS.txt"),
        package = "MetMashR"
    )
)

# prepare workflow
WF <- import_source()

# apply workflow to annotation source
WF <- model_apply(WF, AT)

# show
predicted(WF)

## -----------------------------------------------------------------------------
# prepare source object
AT <- ls_source(
    source = system.file(
        paste0("extdata/MTox/LS/MTox_2023_HILIC_POS.txt"),
        package = "MetMashR"
    )
)

# prepare workflow
WF <-
    # step 1 import source from file
    import_source() +
    # step 2 filter the "Grade" column to only include "A" and "B"
    filter_labels(
        column_name = "Grade",
        labels = c("A", "B"),
        mode = "include"
    )

# apply workflow to annotation source
WF <- model_apply(WF, AT)

# show
predicted(WF)

## -----------------------------------------------------------------------------
# source after import and before filtering
predicted(WF[1])

## -----------------------------------------------------------------------------
# prepare source object
AT <- ls_source(
    source = system.file(
        paste0("extdata/MTox/LS/MTox_2023_HILIC_POS.txt"),
        package = "MetMashR"
    )
)

# prepare cache
TF <- rds_database(
    source = tempfile()
)

# prepare workflow
WF <-
    # step 1 import source from file
    import_source() +
    # step 2 filter the "Grade" column to only include "A" and "B"
    filter_labels(
        column_name = "Grade",
        labels = c("A", "B"),
        mode = "include"
    ) +
    # step 3 query lipidmaps api for inchikey
    lipidmaps_lookup(
        query_column = "LipidName",
        context = "compound",
        context_item = "abbrev",
        output_item = "inchi_key",
        cache = TF,
        suffix = ""
    )

# apply workflow to annotation source
WF <- model_apply(WF, AT)

# show
predicted(WF)

## -----------------------------------------------------------------------------
# retrieve cache
TF <- read_source(TF)

# filter records with no inchikey
FI <-
    filter_na(
        column_name = "inchi_key"
    )

# apply
FI <- model_apply(FI, TF)

# show
.DT(predicted(FI)$data)

## -----------------------------------------------------------------------------
custom_dict <- list(
    list(
        pattern = "AcCa",
        replace = "CAR",
        fixed = TRUE
    ),
    list(
        pattern = "AEA",
        replace = "NAE",
        fixed = TRUE
    ),
    list(
        pattern = "_",
        replace = "/",
        fixed = TRUE
    )
)

## -----------------------------------------------------------------------------
# prepare workflow
WF <-
    # step 1 import source from file
    import_source() +
    # step 2 filter the "Grade" column to only include "A" and "B"
    filter_labels(
        column_name = "Grade",
        labels = c("A", "B"),
        mode = "include"
    ) +
    # step 3 normalise lipid names using the custom dictionary:
    normalise_strings(
        search_column = "LipidName",
        output_column = "normalised_name",
        dictionary = custom_dict
    ) +
    # step 4 query lipidmaps api for inchikey using the names provided by
    # LipidSearch
    lipidmaps_lookup(
        query_column = "LipidName",
        context = "compound",
        context_item = "abbrev",
        output_item = "inchi_key",
        suffix = "_LipidName",
        cache = TF
    ) +
    # step 5 query lipidmaps api for inchikey using the names provided by
    # LipidSearch
    lipidmaps_lookup(
        query_column = "normalised_name",
        context = "compound",
        context_item = "abbrev",
        output_item = "inchi_key",
        suffix = "_normalised"
    )

# apply workflow to annotation source
WF <- model_apply(WF, AT)

#  show result table for relevant columns
.DT(predicted(WF)$data[, c(
    "LipidName", "normalised_name",
    "inchi_key_LipidName", "inchi_key_normalised"
)])

## -----------------------------------------------------------------------------
# prepare workflow
CR <- combine_records(
    group_by = "LipidName",
    default_fcn = fuse_unique(separator = "; "),
    fcns = list(
        count = count_records()
    )
)

# apply to previous output
CR <- model_apply(CR, predicted(WF))

# show output for relevant columns
.DT(predicted(CR)$data[, c(
    "LipidName", "normalised_name",
    "inchi_key_normalised", "count"
)])

## -----------------------------------------------------------------------------
sessionInfo()