## ----echo=FALSE--------------------------------------------------------------- library(BiocStyle) fake <- "_alabaster.foo_" knitr::opts_chunk$set(error=FALSE, warning=FALSE, message=FALSE) ## ----echo=FALSE, results="asis"----------------------------------------------- stuff <- '{ "$schema": "http://json-schema.org/draft-07/schema", "$id": "sparse_triplet_matrix/v1.json", "title": "Sparse Triplet Matrix", "type": "object", "description": "Sparse matrix in triplet form, as a demonstration of how to use a schema for my dense matrix type. Data is stored in a CSV file with three columns - `i`, an integer field containing the 0-based row indices of each non-zero element; `j`, an integer field containing the column indices; and `x`, a double-precision field containing the value of the non-zero element.", "allOf": [ { "$ref": "../array/v1.json" }, { "$ref": "../_md5sum/v1.json" } ], "properties": { "sparse_triplet_matrix": { "type": "object", "properties": { "num_elements": { "type": "integer", "description": "Number of non-zero elements in the sparse matrix.", "minimum": 0 } }, "required": [ "num_elements" ], "allOf": [ { "$ref": "../_compression/v1.json" } ] } }, "required": [ "sparse_triplet_matrix" ], "_attributes": { "format": "text/csv", "restore": { "R": "alabaster.foo::loadSparseTripletMatrix" } } }' cat("```json", stuff, "```", sep="\n"); ## ----results="asis", echo=FALSE----------------------------------------------- resolved <- ' { "$id": "sparse_triplet_matrix/v1.json", "$schema": "http://json-schema.org/draft-07/schema", "_attributes": { "format": "text/csv", "restore": { "R": "alabaster.foo::loadSparseTripletMatrix" } }, "description": "Sparse matrix in triplet form, as a demonstration of how to use a schema for my dense matrix type. Data is stored in a CSV file with three columns - `i`, an integer field containing the 0-based row indices of each non-zero element; `j`, an integer field containing the column indices; and `x`, a double-precision field containing the value of the non-zero element.\\n\\nDerived from `array/v1.json`: some kind of multi-dimensional array, where we store metadata about the dimensions and type of data. The exact implementation of the array is left to concrete subclasses.", "properties": { "$schema": { "description": "The schema to use.", "type": "string" }, "array": { "additionalProperties": false, "properties": { "dimensions": { "description": "Dimensions of an n-dimensional array. Dimensions should be ordered from the fastest-changing to the slowest.", "items": { "type": "integer" }, "minItems": 1, "type": "array" }, "type": { "description": "Type of data stored in this array.", "enum": [ "boolean", "number", "integer", "string", "other" ], "type": "string" } }, "required": [ "dimensions" ], "type": "object" }, "is_child": { "default": false, "description": "Is this a child document, only to be interpreted in the context of the parent document from which it is linked? This may have implications for search and metadata requirements.", "type": "boolean" }, "md5sum": { "description": "MD5 checksum for the file.", "type": "string" }, "path": { "description": "Path to the file in the project directory.", "type": "string" }, "sparse_triplet_matrix": { "properties": { "compression": { "description": "Type of compression applied to the file.", "enum": [ "none", "gzip", "bzip2" ], "type": "string" }, "num_elements": { "description": "Number of non-zero elements in the sparse matrix.", "minimum": 0, "type": "integer" } }, "required": [ "num_elements" ], "type": "object" } }, "required": [ "$schema", "array", "md5sum", "path", "sparse_triplet_matrix" ], "title": "Sparse Triplet Matrix ", "type": "object" } ' cat("```json", resolved, "```", sep="\n"); ## ----------------------------------------------------------------------------- library(Matrix) library(alabaster.base) library(S4Vectors) setMethod("stageObject", "dgTMatrix", function(x, dir, path, child=FALSE) { # Create a subdirectory to stash our contents. dir.create(file.path(dir, path), showWarnings=FALSE) # Create a DataFrame with the triplet data. df <- DataFrame(i = x@i, j = x@j, x = x@x) # .quickWriteCsv will make sure it's written in an 'alabaster-standard' format. outpath <- file.path(path, "foo.csv.gz") .quickWriteCsv(df, file.path(dir, outpath), compression="gzip") # Specifying the package name in the package attribute of the schema, # to ensure that writeMetadata() can find it for validation. schema <- "sparse_triplet_matrix/v1.json" attr(schema, "package") <- "alabaster.foo" # Formatting the metadata for return. list( `$schema`=schema, # Reported path must be relative to 'dir'. path=outpath, # Pass along the 'child' specification from the call. is_child=child, `array`=list( # Need I() to prevent unboxing of length-1 vectors. dimensions=I(dim(x)), # double-precision values => 'number' in JSON schema's language. type="number" ), sparse_triplet_matrix=list( num_elements=nrow(df), compression="gzip" ) ) }) ## ----------------------------------------------------------------------------- x <- sparseMatrix( i=c(1,2,3,5,6), j=c(3,6,1,3,8), x=runif(5), dims=c(10, 10), repr="T" ) x tmp <- tempfile() dir.create(tmp) meta <- stageObject(x, tmp, "test") str(meta) ## ----echo=FALSE, results="hide"----------------------------------------------- meta$md5sum <- digest::digest(meta$path) meta.file <- jsonlite::toJSON(meta, auto_unbox=TRUE) new.schema <- tempfile(fileext=".json") write(file=new.schema, resolved) jsonvalidate::json_validate(schema=new.schema, json=meta.file, engine="ajv", error=TRUE) ## ----------------------------------------------------------------------------- loadSparseTripletMatrix <- function(info, project) { # Need to get the file path. path <- acquireFile(project, info$path) # This utility will check that the CSV is correctly formatted, # which is more stringent than read.csv. df <- quickReadCsv(path, expected.columns=c(i="integer", j="integer", x="double"), expected.nrows=info$sparse_triplet_matrix$num_elements, compression=info$sparse_triplet_matrix$compression, row.names=FALSE ) # Constructor uses 1-based indices. sparseMatrix( i=df$i + 1L, j=df$j + 1L, x=df$x, dims=info$array$dimensions, repr="T" ) } ## ----------------------------------------------------------------------------- loadSparseTripletMatrix(meta, tmp) ## ----------------------------------------------------------------------------- # Typically in zzz.R. .onLoad <- function(libname, pkgname) { existing <- options("alabaster.schema.locations") options(alabaster.schema.locations=union("alabaster.foo", existing)) } .onUnload <- function(libpath) { existing <- options("alabaster.schema.locations") options(alabaster.schema.locations=setdiff(existing, "alabaster.foo")) } ## ----------------------------------------------------------------------------- # Abbreviated example from artificer.se: setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FALSE) { dir.create(file.path(dir, path), showWarnings=FALSE) # Saving the colData. info <- stageObject(colData(x), dir, file.path(path, "coldata"), child=TRUE) cd.info <- list(resource=writeMetadata(info, dir=dir)) # Saving the rowData. info <- stageObject(rowData(x), dir, file.path(path, "rowdata"), child=TRUE) rd.info <- list(resource=writeMetadata(info, dir=dir)) # Saving the other metadata. info <- stageObject(metadata(x), dir, file.path(path, "metadata"), child=TRUE) other.info <- list(resource=writeMetadata(info, dir=dir)) # Saving the assays. assay.info <- list() for (a in assayNames(x)) { curmat <- assay(x, a) mat.path <- file.path(path, paste0("assay-", i)) meta <- altStageObject(curmat, path=mat.path, dir=dir, child=TRUE) deets <- writeMetadata(meta, dir=dir) assay.info <- c(assay.info, list(list(name=ass.names[i], resource=deets))) } list( `$schema`="summarized_experiment/v1.json", path=file.path(path, meta.name), summarized_experiment=list( assays=assay.info, column_data=cd.info, row_data=rd.info, other_data=meta.info, dimensions=dim(x) ), is_child=child ) }) ## ----------------------------------------------------------------------------- # Abbreviated example from artificer.se: loadSummarizedExperiment <- function(exp.info, project) { all.assays <- list() for (y in seq_along(exp.info$summarized_experiment$assays)) { cur.ass <- exp.info$summarized_experiment$assays[[y]] aname <- cur.ass$name apath <- cur.ass$resource$path ass.info <- acquireMetadata(project, apath) all.assays[[aname]] <- altLoadObject(ass.info, project=project) } cd.info <- acquireMetadata(project, exp.info$summarized_experiment$column_data$resource$path) cd <- altLoadObject(cd.info, project=project) rd.info <- acquireMetadata(project, exp.info$summarized_experiment$row_data$resource$path) rd <- altLoadObject(rd.info, project=project) other.info <- acquireMetadata(project, exp.info$summarized_experiment$other_data$resource$path) other <- altLoadObject(other.info, project=project) SummarizedExperiment(all.assays, colData=cd, rowData=rd, metadata=other, checkDimnames=FALSE) } ## ----------------------------------------------------------------------------- sessionInfo()