download code
## Chunk 1
library("Biobase")
library("genefilter")
library("cluster")
library("hgu95av2.db")
library("annotate")
library("MASS")
library("hopach")
library("kohonen")
library("RColorBrewer")
## Chunk 2
library("ALL")
data(ALL)
bcell = grep("^B", as.character(ALL$BT))
moltyp = which(as.character(ALL$mol.biol)
    %in% c("NEG", "BCR/ABL"))
ALL_bcrneg = ALL[, intersect(bcell, moltyp)]
ALL_bcrneg$mol.biol = factor(ALL_bcrneg$mol.biol)
ALLfilt_bcrneg = nsFilter(ALL_bcrneg, var.cutoff=0.75)$eset
## Chunk 3
GOTFfun = function(GOID) {
    x = hgu95av2GO2ALLPROBES[[GOID]]
    unique(x[ names(x) != "IEA"])
}
GOIDs = c("GO:0003700", "GO:0003702", "GO:0003709", 
    "GO:0016563", "GO:0016564")
TFs = unique(unlist(lapply(GOIDs, GOTFfun)))
inSel = match(TFs, featureNames(ALLfilt_bcrneg), nomatch=0)
es2 = ALLfilt_bcrneg[inSel,]
## Chunk 4
iqrs = esApply(es2, 1, IQR)
gvals = scale(t(exprs(es2)), rowMedians(es2), 
    iqrs[featureNames(es2)])
manDist = dist(gvals, method="manhattan")
hmcol = colorRampPalette(brewer.pal(10, "RdBu"))(256)
hmcol = rev(hmcol)
heatmap(as.matrix(manDist), sym=TRUE,  col=hmcol,
    distfun=function(x) as.dist(x))
## Chunk 5
cols = ifelse(es2$mol.biol == "BCR/ABL", "black", 
    "goldenrod")
sam1 = sammon(manDist, trace=FALSE)
plot(sam1$points, col=cols, xlab="Dimension 1", 
    ylab="Dimension 2")
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 9
mD = as.matrix(manDist)
silEst = silcheck(mD, diss=TRUE)
silEst
mD = as.hdist(mD)
mssEst = msscheck(mD)
mssEst
d2 = as.matrix(dist(t(gvals), method="man"))
silEstG = silcheck(d2, diss=TRUE)
silEstG
mssEstG = msscheck(as.hdist(d2))
mssEstG
  (solution chunk)
## Chunk 11
hc1 = hclust(manDist)
hc2 = hclust(manDist, method="single")
hc3 = hclust(manDist, method="ward")
hc4 = diana(manDist)
## Chunk 12
par(mfrow=c(4,1))
plot(hc1, ann=FALSE) 
title(main="Complete Linkage", cex.main=2)
plot(hc2, ann=FALSE)
title(main="Single Linkage", cex.main=2)
plot(hc3, ann=FALSE)
title(main="Ward's Method", cex.main=2)
plot(hc4, ann=FALSE, which.plots=2)
title(main="Divisive Clustering", cex.main=2)
par(mfrow=c(1,1))
  (solution chunk)
  (solution chunk)
## Chunk 15
cph1 = cophenetic(hc1)
cor1 = cor(manDist, cph1)
cor1
plot(manDist, cph1, pch="|", col="blue")
  (solution chunk)
## Chunk 17
km2 = kmeans(gvals, centers=2, nstart=5)
kmx = kmeans(gvals, centers=2, nstart=25)
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 21
pam2 = pam(manDist, k=2, diss=TRUE)
pam3 = pam(manDist, k=3, diss=TRUE)
## Chunk 22
all(names(km2$cluster) == names(pam2$clustering)) 
pam2km = table(km2$cluster, pam2$clustering)
pam2km
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 26
set.seed(123)
s1 = som(gvals, grid=somgrid(4,4))
names(s1)
s2 = som(gvals, grid=somgrid(4,4), alpha=c(1,0.1), 
    rlen=1000)
s3 = som(gvals, grid=somgrid(4,4, topo="hexagonal"), 
    alpha=c(1,0.1), rlen=1000)
whGP = table(s3$unit.classif)
whGP
  (solution chunk)
  (solution chunk)
## Chunk 29
set.seed(777)
s4 = SOM(gvals, grid=somgrid(4,4, topo="hexagonal"))
SOMgp = knn1(s4$code, gvals, 1:nrow(s4$code))
table(SOMgp)
## Chunk 30
cD = dist(s4$code)
cD
## Chunk 31
newCodes = s4$code[-c(2,5,6,10, 15, 9, 11, 13, 14),]
SOMgp2 = knn1(newCodes, gvals, 1:nrow(newCodes))
names(SOMgp2) = row.names(gvals)
table(SOMgp2)
cD2 = dist(newCodes)
cmdSOM = cmdscale(cD2)
#plot(cmdSOM)
  (solution chunk)
  (solution chunk)
## Chunk 34
  samp.hobj = hopach(gvals, dmat = manDist)
  samp.hobj$clust$k
## Chunk 35
samp.hobj$clust$sizes
## Chunk 36
gene.dist = distancematrix(t(gvals), d = "cosangle") 
gene.hobj = hopach(t(gvals), dmat = gene.dist) 
gene.hobj$clust$k
## Chunk 37
gene.hobj$clust$sizes
## Chunk 38
silpam2 = silhouette(pam2)
## Chunk 39
plot(silpam2, main="")
## Chunk 40
silpam3 = silhouette(pam3)
## Chunk 41
plot(silpam3, main="")
## Chunk 42
silpam3[silpam3[,"sil_width"] < 0,]
  (solution chunk)
  (solution chunk)
## Chunk 45
rtt = rowttests(ALLfilt_bcrneg, "mol.biol")
ordtt = order(rtt$p.value)
esTT = ALLfilt_bcrneg[ordtt[1:50],]
## Chunk 46
pairs(t(exprs(esTT)[1:5,]),
    col=ifelse(esTT$mol.biol=="NEG", "green", "blue"))
## Chunk 47
pc = prcomp(t(exprs(esTT)))
## Chunk 48
pairs(pc$x[,1:5], col=ifelse(esTT$mol.biol=="NEG", 
    "green", "blue"))
## Chunk 49
biplot(pc)
  (solution chunk)
## Chunk 51
## esTT.K = ALLfilt_bcrneg[ordtt[1:K],]