## -----------------------------------------------------------------------------
library(base64enc)
library(ggplot2)  # would shadow Biobase::exprs

suppressPackageStartupMessages({
    library(readxl)
    library(destiny)
    library(Biobase)
})

## -----------------------------------------------------------------------------
library(readxl)
raw_ct <- read_xls('mmc4.xls', 'Sheet1')

raw_ct[1:9, 1:9]  #preview of a few rows and columns

## -----------------------------------------------------------------------------
library(destiny)
library(Biobase)

# as.data.frame because ExpressionSets
# do not support readxl’s “tibbles”
ct <- as.ExpressionSet(as.data.frame(raw_ct))
ct

## -----------------------------------------------------------------------------
num_cells <- gsub('^(\\d+)C.*$', '\\1', ct$Cell)
ct$num_cells <- as.integer(num_cells)

## -----------------------------------------------------------------------------
# cells from 2+ cell embryos
have_duplications <- ct$num_cells > 1
# cells with values <= 28
normal_vals <- apply(exprs(ct), 2, function(smp) all(smp <= 28))

## -----------------------------------------------------------------------------
cleaned_ct <- ct[, have_duplications & normal_vals]

## -----------------------------------------------------------------------------
housekeepers <- c('Actb', 'Gapdh')  # houskeeper gene names

normalizations <- colMeans(exprs(cleaned_ct)[housekeepers, ])

guo_norm <- cleaned_ct
exprs(guo_norm) <- exprs(guo_norm) - normalizations

## -----------------------------------------------------------------------------
library(destiny)
#If you start here, run: data(guo_norm)
dm <- DiffusionMap(guo_norm)

## -----------------------------------------------------------------------------
plot(dm)

## -----------------------------------------------------------------------------
palette(cube_helix(6)) # configure color palette

plot(dm,
    pch = 20,             # pch for prettier points
    col_by = 'num_cells', # or “col” with a vector or one color
    legend_main = 'Cell stage')

## -----------------------------------------------------------------------------
plot(dm, 1:2,
    col_by = 'num_cells',
    legend_main = 'Cell stage')

## -----------------------------------------------------------------------------
library(rgl)
plot3d(
    eigenvectors(dm)[, 1:3],
    col = log2(guo_norm$num_cells),
    type = 's', radius = .01)
view3d(theta = 10, phi = 30, zoom = .8)
# now use your mouse to rotate the plot in the window
close3d()

## -----------------------------------------------------------------------------
library(ggplot2)
ggplot(dm, aes(DC1, DC2, colour = factor(num_cells))) +
    geom_point() + scale_color_cube_helix()

## -----------------------------------------------------------------------------
ggplot(NULL, aes(x = seq_along(eigenvalues(dm)), y = eigenvalues(dm))) + theme_minimal() +
    geom_point() + labs(x = 'Diffusion component (DC)', y = 'Eigenvalue')

## -----------------------------------------------------------------------------
plot(dm, 3:4,   col_by = 'num_cells', draw_legend = FALSE)
plot(dm, 19:20, col_by = 'num_cells', draw_legend = FALSE)

## -----------------------------------------------------------------------------
hist(exprs(cleaned_ct)['Aqp3', ], breaks = 20,
     xlab = 'Ct of Aqp3', main = 'Histogram of Aqp3 Ct',
     col = palette()[[4]], border = 'white')

## -----------------------------------------------------------------------------
dilutions <- read_xls('mmc6.xls', 1L)
dilutions$Cell <- NULL  # remove annotation column

get_lod <- function(gene) gene[[max(which(gene != 28))]]

lods <- apply(dilutions, 2, get_lod)
lod <- ceiling(median(lods))
lod

## -----------------------------------------------------------------------------
lod_norm <- ceiling(median(lods) - mean(normalizations))
max_cycles_norm <- ceiling(40 - mean(normalizations))

list(lod_norm = lod_norm, max_cycles_norm = max_cycles_norm)

## -----------------------------------------------------------------------------
guo <- guo_norm
exprs(guo)[exprs(cleaned_ct) >= 28] <- lod_norm

## -----------------------------------------------------------------------------
thresh_dm <- DiffusionMap(guo,
    censor_val = lod_norm,
    censor_range = c(lod_norm, max_cycles_norm),
    verbose = FALSE)

plot(thresh_dm, 1:2, col_by = 'num_cells',
     legend_main = 'Cell stage')

## -----------------------------------------------------------------------------
# remove rows with divisionless cells
ct_w_missing <- ct[, ct$num_cells > 1L]
# and replace values larger than the baseline
exprs(ct_w_missing)[exprs(ct_w_missing) > 28] <- NA

## -----------------------------------------------------------------------------
housekeep <- colMeans(
    exprs(ct_w_missing)[housekeepers, ],
    na.rm = TRUE)

w_missing <- ct_w_missing
exprs(w_missing) <- exprs(w_missing) - housekeep

exprs(w_missing)[is.na(exprs(ct_w_missing))] <- lod_norm

## -----------------------------------------------------------------------------
dif_w_missing <- DiffusionMap(w_missing,
    censor_val = lod_norm,
    censor_range = c(lod_norm, max_cycles_norm),
    missing_range = c(1, 40),
    verbose = FALSE)

plot(dif_w_missing, 1:2, col_by = 'num_cells',
     legend_main = 'Cell stage')

## -----------------------------------------------------------------------------
ct64 <- guo[, guo$num_cells == 64]
dm64 <- DiffusionMap(ct64)

## -----------------------------------------------------------------------------
ct32 <- guo[, guo$num_cells == 32]
pred32 <- dm_predict(dm64, ct32)

## -----------------------------------------------------------------------------
plot(dm64,    1:2,     col     = palette()[[6]],
     new_dcs = pred32, col_new = palette()[[4]])