## ----echo=FALSE---------------------------------------------------------------
library(BiocStyle)
fake <- "_alabaster.foo_"
knitr::opts_chunk$set(error=FALSE, warning=FALSE, message=FALSE)

## ----echo=FALSE, results="asis"-----------------------------------------------
stuff <- '{
    "$schema": "http://json-schema.org/draft-07/schema",
    "$id": "sparse_triplet_matrix/v1.json",
    "title": "Sparse Triplet Matrix",
    "type": "object",
    "description": "Sparse matrix in triplet form, as a demonstration of how to use a schema for my dense matrix type. Data is stored in a CSV file with three columns - `i`, an integer field containing the 0-based row indices of each non-zero element; `j`, an integer field containing the column indices; and `x`, a double-precision field containing the value of the non-zero element.",
    "allOf": [
        { "$ref": "../array/v1.json" },
        { "$ref": "../_md5sum/v1.json" }
    ],
    "properties": {
        "sparse_triplet_matrix": {
            "type": "object",
            "properties": {
                "num_elements": {
                    "type": "integer",
                    "description": "Number of non-zero elements in the sparse matrix.",
                    "minimum": 0
                }
            },
            "required": [ "num_elements" ],
            "allOf": [ { "$ref": "../_compression/v1.json" } ]
        }
    },
    "required": [ "sparse_triplet_matrix" ],
    "_attributes": {
        "format": "text/csv",
        "restore": {
            "R": "alabaster.foo::loadSparseTripletMatrix"
        }
    }
}'
cat("```json", stuff, "```", sep="\n");

## ----results="asis", echo=FALSE-----------------------------------------------
resolved <- '
{
    "$id": "sparse_triplet_matrix/v1.json",
    "$schema": "http://json-schema.org/draft-07/schema",
    "_attributes": {
        "format": "text/csv",
        "restore": {
            "R": "alabaster.foo::loadSparseTripletMatrix"
        }
    },
    "description": "Sparse matrix in triplet form, as a demonstration of how to use a schema for my dense matrix type. Data is stored in a CSV file with three columns - `i`, an integer field containing the 0-based row indices of each non-zero element; `j`, an integer field containing the column indices; and `x`, a double-precision field containing the value of the non-zero element.\\n\\nDerived from `array/v1.json`: some kind of multi-dimensional array, where we store metadata about the dimensions and type of data. The exact implementation of the array is left to concrete subclasses.",
    "properties": {
        "$schema": {
            "description": "The schema to use.",
            "type": "string"
        },
        "array": {
            "additionalProperties": false,
            "properties": {
                "dimensions": {
                    "description": "Dimensions of an n-dimensional array. Dimensions should be ordered from the fastest-changing to the slowest.",
                    "items": {
                        "type": "integer"
                    },
                    "minItems": 1,
                    "type": "array"
                },
                "type": {
                    "description": "Type of data stored in this array.",
                    "enum": [
                        "boolean",
                        "number",
                        "integer",
                        "string",
                        "other"
                    ],
                    "type": "string"
                }
            },
            "required": [
                "dimensions"
            ],
            "type": "object"
        },
        "is_child": {
            "default": false,
            "description": "Is this a child document, only to be interpreted in the context of the parent document from which it is linked? This may have implications for search and metadata requirements.",
            "type": "boolean"
        },
        "md5sum": {
            "description": "MD5 checksum for the file.",
            "type": "string"
        },
        "path": {
            "description": "Path to the file in the project directory.",
            "type": "string"
        },
        "sparse_triplet_matrix": {
            "properties": {
                "compression": {
                    "description": "Type of compression applied to the file.",
                    "enum": [
                        "none",
                        "gzip",
                        "bzip2"
                    ],
                    "type": "string"
                },
                "num_elements": {
                    "description": "Number of non-zero elements in the sparse matrix.",
                    "minimum": 0,
                    "type": "integer"
                }
            },
            "required": [
                "num_elements"
            ],
            "type": "object"
        }
    },
    "required": [
        "$schema",
        "array",
        "md5sum",
        "path",
        "sparse_triplet_matrix"
    ],
    "title": "Sparse Triplet Matrix ",
    "type": "object"
}
'
cat("```json", resolved, "```", sep="\n");

## -----------------------------------------------------------------------------
library(Matrix)
library(alabaster.base)
library(S4Vectors)

setMethod("stageObject", "dgTMatrix", function(x, dir, path, child=FALSE) {
    # Create a subdirectory to stash our contents.
    dir.create(file.path(dir, path), showWarnings=FALSE)

    # Create a DataFrame with the triplet data.
    df <- DataFrame(i = x@i, j = x@j, x = x@x)

    # .quickWriteCsv will make sure it's written in an 'alabaster-standard' format.
    outpath <- file.path(path, "foo.csv.gz")
    .quickWriteCsv(df, file.path(dir, outpath), compression="gzip")

    # Specifying the package name in the package attribute of the schema,
    # to ensure that writeMetadata() can find it for validation.
    schema <- "sparse_triplet_matrix/v1.json"
    attr(schema, "package") <- "alabaster.foo"

    # Formatting the metadata for return.
    list(
        `$schema`=schema,

        # Reported path must be relative to 'dir'.
        path=outpath,

        # Pass along the 'child' specification from the call.
        is_child=child,

        `array`=list(
            # Need I() to prevent unboxing of length-1 vectors.
            dimensions=I(dim(x)),

            # double-precision values => 'number' in JSON schema's language.
            type="number"
        ),

        sparse_triplet_matrix=list(
            num_elements=nrow(df),
            compression="gzip"
        )
    )
})

## -----------------------------------------------------------------------------
x <- sparseMatrix(
    i=c(1,2,3,5,6), 
    j=c(3,6,1,3,8), 
    x=runif(5), 
    dims=c(10, 10), 
    repr="T"
)
x

tmp <- tempfile()
dir.create(tmp)
meta <- stageObject(x, tmp, "test")
str(meta)

## ----echo=FALSE, results="hide"-----------------------------------------------
meta$md5sum <- digest::digest(meta$path)
meta.file <- jsonlite::toJSON(meta, auto_unbox=TRUE)
new.schema <- tempfile(fileext=".json")
write(file=new.schema, resolved)
jsonvalidate::json_validate(schema=new.schema, json=meta.file, engine="ajv", error=TRUE)

## -----------------------------------------------------------------------------
loadSparseTripletMatrix <- function(info, project) {
    # Need to get the file path.
    path <- acquireFile(project, info$path)

    # This utility will check that the CSV is correctly formatted,
    # which is more stringent than read.csv.
    df <- quickReadCsv(path,
        expected.columns=c(i="integer", j="integer", x="double"),
        expected.nrows=info$sparse_triplet_matrix$num_elements,
        compression=info$sparse_triplet_matrix$compression,
        row.names=FALSE
    )

    # Constructor uses 1-based indices.
    sparseMatrix(
         i=df$i + 1L, 
         j=df$j + 1L, 
         x=df$x, 
         dims=info$array$dimensions, 
         repr="T"
    )
}

## -----------------------------------------------------------------------------
loadSparseTripletMatrix(meta, tmp)

## -----------------------------------------------------------------------------
# Typically in zzz.R.
.onLoad <- function(libname, pkgname) {
    existing <- options("alabaster.schema.locations")
    options(alabaster.schema.locations=union("alabaster.foo", existing))
}

.onUnload <- function(libpath) {
    existing <- options("alabaster.schema.locations")
    options(alabaster.schema.locations=setdiff(existing, "alabaster.foo"))
}

## -----------------------------------------------------------------------------
# Abbreviated example from artificer.se:
setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FALSE) {
    dir.create(file.path(dir, path), showWarnings=FALSE)

    # Saving the colData.
    info <- stageObject(colData(x), dir, file.path(path, "coldata"), child=TRUE)
    cd.info <- list(resource=writeMetadata(info, dir=dir))

    # Saving the rowData.
    info <- stageObject(rowData(x), dir, file.path(path, "rowdata"), child=TRUE)
    rd.info <- list(resource=writeMetadata(info, dir=dir))

    # Saving the other metadata.
    info <- stageObject(metadata(x), dir, file.path(path, "metadata"), child=TRUE)
    other.info <- list(resource=writeMetadata(info, dir=dir))

    # Saving the assays.
    assay.info <- list()
    for (a in assayNames(x)) {
        curmat <- assay(x, a)
        mat.path <- file.path(path, paste0("assay-", i))
        meta <- altStageObject(curmat, path=mat.path, dir=dir, child=TRUE)
        deets <- writeMetadata(meta, dir=dir)
        assay.info <- c(assay.info, list(list(name=ass.names[i], resource=deets)))
    }

    list(
        `$schema`="summarized_experiment/v1.json",
        path=file.path(path, meta.name),
        summarized_experiment=list(
            assays=assay.info,
            column_data=cd.info,
            row_data=rd.info,
            other_data=meta.info,
            dimensions=dim(x)
        ),
        is_child=child
    )
})

## -----------------------------------------------------------------------------
# Abbreviated example from artificer.se:
loadSummarizedExperiment <- function(exp.info, project) {
    all.assays <- list()
    for (y in seq_along(exp.info$summarized_experiment$assays)) {
        cur.ass <- exp.info$summarized_experiment$assays[[y]]
        aname <- cur.ass$name
        apath <- cur.ass$resource$path
        ass.info <- acquireMetadata(project, apath)
        all.assays[[aname]] <- altLoadObject(ass.info, project=project)
    }

    cd.info <- acquireMetadata(project, exp.info$summarized_experiment$column_data$resource$path)
    cd <- altLoadObject(cd.info, project=project)
    rd.info <- acquireMetadata(project, exp.info$summarized_experiment$row_data$resource$path)
    rd <- altLoadObject(rd.info, project=project)
    other.info <- acquireMetadata(project, exp.info$summarized_experiment$other_data$resource$path)
    other <- altLoadObject(other.info, project=project)

    SummarizedExperiment(all.assays, colData=cd, rowData=rd, metadata=other, checkDimnames=FALSE)
}

## -----------------------------------------------------------------------------
sessionInfo()