## ----options, results="hide", include=FALSE, cache=FALSE, message=FALSE---- knitr::opts_chunk$set(fig.align="center", cache=TRUE,error=FALSE, #stop on error fig.width=5, fig.height=5, autodep=TRUE, out.width="600px", out.height="600px", results="markup", echo=TRUE, eval=TRUE) #knitr::opts_knit$set(stop_on_error = 2L) #really make it stop #knitr::dep_auto() options(getClass.msg=FALSE) graphics:::par(pch = 16, las = 1) set.seed(12345) ## for reproducibility library(slingshot, quietly = TRUE) ## ----dataSetup_splat, eval=FALSE------------------------------------------- # library(HSMMSingleCell, quietly = TRUE) # library(splatter, quietly = TRUE) # # # load data # data("HSMM_expr_matrix") # counts <- round(HSMM_expr_matrix) # # # estimate simulation parameters # params <- splatEstimate(counts) # # # set path-related parameters # params <- setParams(params, update = list(batchCells = 200, # path.nonlinearProb = .5, # de.prob = .6, de.facLoc = 1, # de.facScale = 2)) ## ----load_data_splat, echo = FALSE----------------------------------------- library(splatter, quietly = TRUE) data("splatterParameters") ## ----simulate_splat-------------------------------------------------------- # simulate sim <- splatSimulate(params, method = "paths") ## ----data_sling------------------------------------------------------------ library(slingshot, quietly = FALSE) data("slingshotExample") dim(rd) # data representing cells in a reduced dimensional space length(cl) # vector of cluster labels ## ----genefilt-------------------------------------------------------------- # filter genes down to potential cell-type markers # at least M (15) reads in at least N (15) cells geneFilter <- apply(assays(sim)$counts,1,function(x){ sum(x >= 15) >= 15 }) sim <- sim[geneFilter, ] ## ----norm------------------------------------------------------------------ FQnorm <- function(counts){ rk <- apply(counts,2,rank,ties.method='min') counts.sort <- apply(counts,2,sort) refdist <- apply(counts.sort,1,median) norm <- apply(rk,2,function(r){ refdist[r] }) rownames(norm) <- rownames(counts) return(norm) } assays(sim)$norm <- FQnorm(assays(sim)$counts) ## ----pca, cache=TRUE------------------------------------------------------- pca <- prcomp(t(log1p(assays(sim)$norm)), scale. = FALSE) rd1 <- pca$x[,1:2] plot(rd1, col = topo.colors(100)[sim$Step], pch=16, asp = 1) ## ----dm, cache=TRUE-------------------------------------------------------- library(destiny, quietly = TRUE) dm <- DiffusionMap(t(log1p(assays(sim)$norm))) rd2 <- cbind(DC1 = dm$DC1, DC2 = dm$DC2) plot(rd2, col = topo.colors(100)[sim$Step], pch=16, asp = 1) ## ----add_RDs, cache=TRUE--------------------------------------------------- reducedDims(sim) <- SimpleList(PCA = rd1, DiffMap = rd2) ## ----clustering_mclust----------------------------------------------------- library(mclust, quietly = TRUE) cl1 <- Mclust(rd1)$classification colData(sim)$GMM <- cl1 library(RColorBrewer) plot(rd1, col = brewer.pal(9,"Set1")[cl1], pch=16, asp = 1) ## ----clustering------------------------------------------------------------ cl2 <- kmeans(rd1, centers = 4)$cluster colData(sim)$kmeans <- cl2 plot(rd1, col = brewer.pal(9,"Set1")[cl2], pch=16, asp = 1) ## ----sling_sce------------------------------------------------------------- sce <- slingshot(sim, clusterLabels = 'GMM', reducedDim = 'PCA') ## ----plot_curve_1---------------------------------------------------------- summary(sce$slingPseudotime_1) colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100) plot(reducedDims(sce)$PCA, col = colors[cut(sce$slingPseudotime_1,breaks=100)], pch=16, asp = 1) lines(SlingshotDataSet(sce), lwd=2) ## ----plot_curve_2---------------------------------------------------------- plot(reducedDims(sce)$PCA, col = brewer.pal(9,'Set1')[sce$GMM], pch=16, asp = 1) lines(SlingshotDataSet(sce), lwd=2, type = 'lineages') ## ----fitgam, cache = TRUE-------------------------------------------------- require(gam) t <- sce$slingPseudotime_1 # for time, only look at the 1,000 most variable genes Y <- log1p(assays(sim)$norm) var1K <- names(sort(apply(Y,1,var),decreasing = TRUE))[1:1000] Y <- Y[var1K,] # fit a GAM with a loess term for pseudotime gam.pval <- apply(Y,1,function(z){ d <- data.frame(z=z, t=t) tmp <- gam(z ~ lo(t), data=d) p <- summary(tmp)[4][[1]][1,5] p }) ## ----heatmaps-------------------------------------------------------------- require(clusterExperiment) topgenes <- names(sort(gam.pval, decreasing = FALSE))[1:100] heatdata <- assays(sim)$norm[rownames(assays(sim)$norm) %in% topgenes, order(t, na.last = NA)] heatclus <- sce$GMM[order(t, na.last = NA)] ce <- ClusterExperiment(heatdata, heatclus, transformation = log1p) plotHeatmap(ce, clusterSamplesData = "orderSamplesValue",visualizeData = 'transformed') ## ----sling_lines_unsup----------------------------------------------------- lin1 <- getLineages(rd, cl, start.clus = '1') lin1 plot(rd, col = brewer.pal(9,"Set1")[cl], asp = 1, pch = 16) lines(lin1, lwd = 3) ## ----lines_sup_end--------------------------------------------------------- lin2 <- getLineages(rd, cl, start.clus= '1', end.clus = '3') plot(rd, col = brewer.pal(9,"Set1")[cl], asp = 1, pch = 16) lines(lin2, lwd = 3, show.constraints = TRUE) ## ----curves---------------------------------------------------------------- crv1 <- getCurves(lin1) crv1 plot(rd, col = brewer.pal(9,"Set1")[cl], asp = 1, pch = 16) lines(crv1, lwd = 3) ## ----session--------------------------------------------------------------- sessionInfo()