download code
## Chunk 1
library("BiocCaseStudies")
options(width=56)
## Chunk 2
library("Biobase")
library("RColorBrewer")
library("bioDist")
library("genefilter")
library("class")
library("MLInterfaces")
library("hgu95av2.db")
library("annotate")
library("randomForest")
## Chunk 3
library("ALL")
data(ALL)
bcell = grep("^B", as.character(ALL$BT))
moltyp = which(as.character(ALL$mol.biol) 
    %in% c("NEG", "BCR/ABL"))
ALL_bcrneg = ALL[, intersect(bcell, moltyp)]
ALL_bcrneg$mol.biol = factor(ALL_bcrneg$mol.biol)
  (solution chunk)
## Chunk 5
ALLfilt_bcrneg = nsFilter(ALL_bcrneg, var.cutoff=0.75)$eset
  (solution chunk)
## Chunk 7
rowIQRs = function(eSet) {
    numSamp = ncol(eSet)
    lowQ = rowQ(eSet, floor(0.25 * numSamp))
    upQ = rowQ(eSet, ceiling(0.75 * numSamp))
    upQ - lowQ
}
## Chunk 8
standardize = function(x) (x - rowMedians(x)) / rowIQRs(x)
exprs(ALLfilt_bcrneg) = standardize(exprs(ALLfilt_bcrneg))
## Chunk 9
eucD = dist(t(exprs(ALLfilt_bcrneg)))
eucM = as.matrix(eucD)
dim(eucM)
## Chunk 10
library("RColorBrewer")
hmcol = colorRampPalette(brewer.pal(10, "RdBu"))(256)
hmcol = rev(hmcol)
heatmap(eucM, sym=TRUE, col=hmcol, distfun=as.dist)
  (solution chunk)
  (solution chunk)
## Chunk 13
closest.top("03002", eucM, 1)
  (solution chunk)
## Chunk 15
 Negs = which(ALLfilt_bcrneg$mol.biol == "NEG")
 Bcr = which(ALLfilt_bcrneg$mol.biol == "BCR/ABL")
 S1 = sample(Negs, 20, replace=FALSE)
 S2 = sample(Bcr, 20, replace = FALSE)
 TrainInd = c(S1, S2)
 TestInd = setdiff(1:79, TrainInd)
  (solution chunk)
## Chunk 17
Traintt = rowttests(ALLfilt_bcrneg[, TrainInd], "mol.biol")
ordTT = order(abs(Traintt$statistic), decreasing=TRUE) 
fNtt = featureNames(ALLfilt_bcrneg)[ordTT[1:50]]
  (solution chunk)
## Chunk 19
BNf = ALLfilt_bcrneg[fNtt,]
knnf = MLearn( mol.biol ~ ., data=BNf, knnI(k=1,l=0), 
    TrainInd)
 confuMat(knnf)
  (solution chunk)
## Chunk 21
BNx = ALLfilt_bcrneg[1:1000,]
## Chunk 22
knnXval1 = MLearn(mol.biol~., data=BNx, knn.cvI(k=1, l=0),
    trainInd=1:ncol(BNx)) 
## Chunk 23
## knnXval1 = MLearn(mol.biol~., data=BNx, knnI(k=1, l=0),
##     xvalSpec("LOO")) 
  (solution chunk)
## Chunk 25
confuMat(knnXval1)
## Chunk 26
lk3f1 = MLearn(mol.biol~., data=BNx, knnI(k=1),
    xvalSpec("LOO", fsFun=fs.absT(50)))
confuMat(lk3f1)
  (solution chunk)
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 31
library("randomForest")
set.seed(123)
rf1 = MLearn( mol.biol~., data=ALLfilt_bcrneg, 
    randomForestI, TrainInd, ntree=1000, mtry=55, 
    importance=TRUE) 
## Chunk 32
rf2 = MLearn( mol.biol~., data=ALLfilt_bcrneg, 
    randomForestI, TrainInd, ntree=1000, mtry=10, 
    importance=TRUE) 
## Chunk 33
trainY = ALLfilt_bcrneg$mol.biol[TrainInd]
confuMat(rf1, "train")
confuMat(rf1, "test")
## Chunk 34
confuMat(rf2, "train")
confuMat(rf2, "test")
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 38
opar = par(no.readonly=TRUE, mar=c(7,5,4,2))
par(las=2)
impV1 = getVarImp(rf1)
plot(impV1, n=15, plat="hgu95av2", toktype="SYMBOL")
par(opar)
## Chunk 39
par(las=2, mar=c(7,5,4,2))
impV2 = getVarImp(rf2)
plot(impV2, n=15, plat="hgu95av2", toktype="SYMBOL")
par(opar)
  (solution chunk)
  (solution chunk)
  (solution chunk)
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 46
Bcell = grep("^B", ALL$BT)
ALLs = ALL[,Bcell]
types = c("BCR/ABL", "NEG", "ALL1/AF4")
threeG = ALLs$mol.biol %in% types
ALL3g = ALLs[,threeG]
qrange <- function(x)
    diff(quantile(x, c(0.1, 0.9)))
ALL3gf = nsFilter(ALL3g, var.cutoff=0.75, 
    var.func=qrange)$eset
ALL3gf$mol.biol = factor(ALL3gf$mol.biol)
## Chunk 47
s1 = table(ALL3gf$mol.biol)
trainN = ceiling(s1/2)
sN = split(1:length(ALL3gf$mol.biol), ALL3gf$mol.biol)
trainInd = NULL
testInd = NULL
set.seed(777)
for(i in 1:3) {
    trI = sample(sN[[i]], trainN[[i]])
    teI = setdiff(sN[[i]], trI)
    trainInd = c(trainInd, trI)
    testInd = c(testInd, teI)
}
trainSet = ALL3gf[, trainInd]
testSet = ALL3gf[, testInd]
  (solution chunk)