################################################### ### chunk number 1: setup1 ################################################### library("Category") library("ALL") library("hgu95av2.db") library("annotate") library("genefilter") library("SNPchip") library("geneplotter") library("limma") library("lattice") library("graph") ################################################### ### chunk number 2: Setup2 ################################################### options(width=80) ## FIXME: this needs to go somewhere, not sure where ## it is a _slightly_ modified version from SNPchip ## RG asks if the slight mod might not be taken up by the SNPchip ## folks? Otherwise, geneplotter is the obvious place plotCytoband2 <- function(chromosome, cytoband, xlim, xaxs="r", new=TRUE, label.cytoband=TRUE, cex.axis=1, outer=FALSE, ...){ def.par <- par(no.readonly=TRUE, mar=c(4.1, 0.1, 3.1, 2.1)) on.exit(def.par) if(missing(cytoband)) data(cytoband, package="SNPchip", envir=environment()) if(missing(chromosome)){ if(length(unique(cytoband[, "chrom"])) > 1) stop("Must specify chromosome") } if(length(unique(cytoband$chrom)) > 1){ cytoband <- cytoband[cytoband[, "chrom"] == chromosome, ] } rownames(cytoband) <- as.character(cytoband[, "name"]) if(missing(xlim)) xlim <- c(0, chromosomeSize(unique(cytoband$chrom))) cytoband_p <- cytoband[grep("^p", rownames(cytoband), value=TRUE), ] cytoband_q <- cytoband[grep("^q", rownames(cytoband), value=TRUE), ] p.bands <- nrow(cytoband_p) cut.left <- c() cut.right <- c() ## 1st band of arm or 1st band after "stalk" ## last band of arm or last band before "stalk" for (i in 1:nrow(cytoband)) { if (i == 1) { cut.left[i] <- TRUE; cut.right[i] <- FALSE} else if (i == p.bands) { cut.left[i] <- FALSE; cut.right[i] <- TRUE} else if (i == (p.bands+1)) { cut.left[i] <- TRUE; cut.right[i] <- FALSE} else if (i == nrow(cytoband)) { cut.left[i] <- FALSE; cut.right[i] <- TRUE} else{ cut.left[i] <- FALSE; cut.right[i] <- FALSE } } for (i in 1:nrow(cytoband)) { if (as.character(cytoband[i, "gieStain"]) == "stalk") { cut.right[i-1] <- TRUE cut.left[i] <- NA cut.right[i] <- NA cut.left[i+1] <- TRUE } } ##When plotting subregions of a chromosome, this prevents the cytobands from extending beyond the subsetted object ##exclude cytobands that end before the minimum plotting limits include <- cytoband[, "chromEnd"] > xlim[1] & cytoband[, "chromStart"] < xlim[2] cytoband <- cytoband[include, ] cut.left <- cut.left[include] cut.right <- cut.right[include] if(new){ plot(c(0, cytoband[nrow(cytoband), "chromEnd"]), c(0, 2), xlim=xlim, type="n", xlab="", ylab="", axes=FALSE, xaxs=xaxs, ...) } for (i in 1:nrow(cytoband)) { start <- cytoband[i, "chromStart"] last <- cytoband[i, "chromEnd"] delta = (last-start)/4 getStain <- function(stain){ switch(stain, gneg="grey100", gpos25="grey90", gpos50="grey70", gpos75="grey40", gpos100="grey0", gvar="grey100", stalk="brown3", acen="brown4", "white") } color <- getStain(as.character(cytoband[i, "gieStain"])) if (is.na(cut.left[i]) & is.na(cut.right[i])) { ## this is a "stalk", do not draw box. Draw two vertical lines instead delta <- (last-start)/3 lines(c(start+delta, start+delta), c(0,2), col=color) lines(c(last-delta, last-delta), c(0,2), col=color) } else if (cut.left[i] & cut.right[i]) { # cut both lasts polygon(c(start, start+delta, last-delta, last, last, last-delta, start+delta, start), c(0.3, 0, 0, 0.3, 1.7, 2, 2, 1.7), col=color) } else if (cut.left[i]) { # cut left last only polygon(c(start, start+delta, last, last, start+delta, start), c(0.3, 0, 0, 2, 2, 1.7), col=color) } else if (cut.right[i]) { # cut right last only polygon(c(start, last-delta, last, last, last-delta, start), c(0, 0, 0.3, 1.7, 2, 2),col=color) } else { polygon(c(start, last, last, start), c(0, 0, 2, 2), col=color) } } my.x <- (cytoband$chromStart+cytoband$chromEnd)/2 if(label.cytoband){ axis(1, at=my.x, labels=rownames(cytoband), outer=outer, cex.axis=cex.axis, line=1, las=3, tick=FALSE) axis(1, at=cytoband$chromStart, outer=outer, cex.axis=cex.axis, line=1, las=3, label=FALSE) } } ## inducedSubGraph <- function(n, g) { ## inducedNodes <- unlist(lapply(acc(g, n), names)) ## inducedNodes <- unique(c(n, inducedNodes)) ## subGraph(inducedNodes, g) ## } ## FIXME: where doe these belong? Should they go in a package? ## sortBands <- function(bands) { ## chrs <- sub("([^pq]+).*$", "\\1", bands) ## xyIdx <- match(c("X", "Y"), chrs, 0) ## xy <- NULL ## if (any(xyIdx)) { ## chrs <- chrs[-xyIdx] ## xy <- bands[xyIdx] ## df <- bands[-xyIdx] ## } ## ord <- order(as.integer(chrs), bands) ## bands <- bands[ord] ## if (!is.null(xy)) ## bands <- c(bands, xy) ## bands ## } ## Note: Seth's old code had assumed that eset would (1) only have ## the genes that pass the t-test p-value threshold and (2) have ## gene symbols as featureNames(). (1) seems unreasonable, and we ## decided to include all genes. (2) seems like a convenience to ## have nice strip labels; we will do this another way. ## subsetByBand <- function(eset, ct, band) { ## ## eset - ExpressionSet (featureNames assumed to be probe id) ## ## ct - ChrBandTree object ## ## band - String giving the desired band ## ## ## ## RETURN: a new ExpressionSet with only those genes found in the ## ## specified band. ## ## syms <- unlist(mget(featureNames(pvalFiltered), symbolMap)) ## ## entrez2sym <- syms ## ## names(entrez2sym) <- selectedEntrezIds ## ## featureNames(pvalFiltered) <- syms ## egIDs <- unlist(nodeData(ct@toChildGraph, n=band, ## attr="geneIds"), use.names=FALSE) ## wantedProbes <- affyUniverse[as.character(egIDs)] ## ## print(wantedProbes) ## ## FIXME: shouldn't get any NA's... should investigate ## ## wantedSyms <- wantedSyms[!is.na(wantedSyms)] ## ## tmpSet <- eset[wantedSyms, ] ## ## tmpSet ## eset[intersect(wantedProbes, featureNames(eset)), ] ## } ## gseaTstatStripplot <- function(bands, g, ..., include.all = FALSE) ## { ## ## hack to get all top-level stuff. FIXME: more robust solution ## chroms <- c(1:22, "X", "Y") ## chromArms <- c(paste(chroms, "p", sep=""), ## paste(chroms, "q", sep="")) ## egid <- lapply(nodeData(g, bands), "[[", "geneIds") ## affyid <- ## lapply(egid, ## function(x) { ## affyUniverse[as.character(x)] ## }) ## if (include.all) ## { ## affyid$All <- ## affyUniverse[unique(unlist(lapply(nodeData(g)[chromArms], "[[", "geneIds")))] ## } ## tdf <- ## do.call(make.groups, ## lapply(affyid, ## function(x) { ## ttests[x, "statistic"] ## })) ## stripplot(which ~ data, tdf, jitter = TRUE, ## ...) ## } ## ################################################### ### chunk number 3: chr12ideogram ################################################### data(cytoband, package="SNPchip") cyt2 <- cytoband cyt2$gieStain <- "foo" c12p12_idx <- intersect(grep("^q21", cyt2$name), which(cyt2$chrom == "12")) cyt2[c12p12_idx, "gieStain"] <- rep(c("gpos50", "gpos75"), length=length(c12p12_idx)) plotCytoband2(chromosome="12", cyt2, outer=FALSE, cex.axis=0.6, main="Human chromosome 12") ################################################### ### chunk number 4: bcrAblOrNegSubset ################################################### data(ALL, package="ALL") subsetType <- "BCR/ABL" Bcell <- grep("^B", as.character(ALL$BT)) bcrAblOrNegIdx <- which(as.character(ALL$mol.biol) %in% c("NEG", subsetType)) bcrAblOrNeg <- ALL[, intersect(Bcell, bcrAblOrNegIdx)] bcrAblOrNeg$mol.biol <- factor(bcrAblOrNeg$mol.biol) ################################################### ### chunk number 5: annMaps ################################################### annType <- c("db", "env") entrezMap <- getAnnMap("ENTREZID", annotation(bcrAblOrNeg), type=annType, load=TRUE) symbolMap <- getAnnMap("SYMBOL", annotation(bcrAblOrNeg), type=annType, load=TRUE) bandMap <- getAnnMap("MAP", annotation(bcrAblOrNeg), type=annType, load=TRUE) ################################################### ### chunk number 6: nsFiltering ################################################### filterAns <- nsFilter(bcrAblOrNeg, require.entrez = TRUE, remove.dupEntrez = TRUE, var.func = IQR, var.cutoff = 0.5) nsFiltered <- filterAns$eset ################################################### ### chunk number 7: ################################################### hasSYM <- sapply(mget(featureNames(nsFiltered), symbolMap, ifnotfound=NA), function(x) length(x) > 0 && !is.na(x[1])) hasMAP <- sapply(mget(featureNames(nsFiltered), bandMap, ifnotfound=NA), function(x) length(x) > 0 && !is.na(x[1])) nsFiltered <- nsFiltered[hasSYM & hasMAP, ] ################################################### ### chunk number 8: defineGeneUniverse ################################################### affyUniverse <- featureNames(nsFiltered) entrezUniverse <- unlist(mget(affyUniverse, entrezMap)) names(affyUniverse) <- entrezUniverse if (any(duplicated(entrezUniverse))) stop("error in gene universe: can't have duplicate Entrez Gene Ids") ################################################### ### chunk number 9: parametric1 ################################################### design <- model.matrix(~ 0 + nsFiltered$mol.biol) colnames(design) <- c("BCR/ABL", "NEG") contr <- c(1, -1) ## NOTE: we thus have BCR/ABL w.r.t NEG fm1 <- lmFit(nsFiltered, design) fm2 <- contrasts.fit(fm1, contr) fm3 <- eBayes(fm2) ttestLimma <- topTable(fm3, number = nrow(fm3), adjust.method = "none") rownames(ttestLimma) <- as.character(ttestLimma$ID) ttestLimma <- ttestLimma[featureNames(nsFiltered), ] tstats <- ttestLimma$t names(tstats) <- entrezUniverse[rownames(ttestLimma)] ## ################################################### ### chunk number 10: selectedSubset ################################################### ttestCutoff <- 0.01 smPV <- ttestLimma$P.Value < ttestCutoff pvalFiltered <- nsFiltered[smPV, ] selectedEntrezIds <- unlist(mget(featureNames(pvalFiltered), entrezMap)) ## ################################################### ### chunk number 11: ################################################### chrSortOrder <- function(df) { chrs <- sub("([^pq]+).*$", "\\1", rownames(df)) xyIdx <- chrs %in% c("X", "Y") xydf <- NULL if (any(xyIdx)) { chrs <- chrs[!xyIdx] xydf <- df[xyIdx, ] df <- df[!xyIdx, ] } ord <- order(as.integer(chrs), rownames(df)) df <- df[ord, ] if (!is.null(xydf)) df <- rbind(df, xydf) df } ################################################### ### chunk number 12: ################################################### gseaTstatStripplot <- function(bands, g, ..., include.all = FALSE) { chroms <- c(1:22, "X", "Y") chromArms <- c(paste(chroms, "p", sep=""), paste(chroms, "q", sep="")) egid <- lapply(nodeData(g, bands), "[[", "geneIds") if (include.all) { egid$All <- unique(unlist(lapply(nodeData(g)[chromArms], "[[", "geneIds"))) } tdf <- do.call(make.groups, lapply(egid, function(x) tstats[x])) stripplot(which ~ data, tdf, jitter = TRUE, ...) } ################################################### ### chunk number 13: ################################################### esetBWPlot <- function(tmpSet, ..., layout=c(1, nrow(emat))) { emat <- exprs(tmpSet) pd <- pData(tmpSet) probes <- rownames(emat) syms <- sapply(mget(probes, hgu95av2SYMBOL, ifnotfound=NA), function(x) if (all(is.na(x))) "NA" else as.character(x)[1]) selectedAffy <- probes %in% affyUniverse[selectedEntrezIds] symsSelected <- syms[selectedAffy] symsWithStatus <- paste(syms, ifelse(selectedAffy, "*", ""), sep = "") pdat <- cbind(exprs=as.vector(emat), genes=factor(probes, levels = probes, labels = syms), pd[rep(seq_len(nrow(pd)), each=nrow(emat)), ]) pdat <- transform(pdat, genes = reorder(genes, exprs)) panels.to.shade <- levels(pdat$genes) %in% symsSelected bwplot(mol.biol ~ exprs | genes, data=pdat, layout = layout, auto.key=TRUE, scales=list(x=list(log=2L)), xlab="Log2 Expression", panels.to.shade = panels.to.shade, panel = function(..., panels.to.shade) { if (panels.to.shade[packet.number()]) panel.fill(col = "lightgrey") panel.bwplot(...) }, strip=FALSE, strip.left=TRUE, ...) } g1 <- makeChrBandGraph(annotation(nsFiltered), univ=entrezUniverse) ct <- ChrBandTreeFromGraph(g1) subsetByBand <- function(eset, ct, band) { egIDs <- unlist(nodeData(ct@toChildGraph, n=band, attr="geneIds"), use.names=FALSE) wantedProbes <- affyUniverse[as.character(egIDs)] eset[intersect(wantedProbes, featureNames(eset)), ] } ################################################### ### chunk number 14: basicParams ################################################### params <- new("ChrMapHyperGParams", conditional=FALSE, testDirection="over", universeGeneIds=entrezUniverse, geneIds=selectedEntrezIds, annotation="hgu95av2", pvalueCutoff=0.05) paramsCond <- params paramsCond@conditional <- TRUE ################################################### ### chunk number 15: basicTest ################################################### hgans <- hyperGTest(params) hgansCond <- hyperGTest(paramsCond) ################################################### ### chunk number 16: result1 ################################################### sumUn <- summary(hgans, categorySize=1) chrSortOrder(sumUn) sumCond <- summary(hgansCond, categorySize=1) chrSortOrder(sumCond) ################################################### ### chunk number 17: ################################################### gseaTstatStripplot(c("12q21.1", "12q21", "12q2", "12q"), include.all = TRUE, g = g1, xlab = "Per-gene t-statistics", panel = function(...) { require(grid, quietly = TRUE) grid.rect(y = unit(2, "native"), height = unit(1, "native"), gp = gpar(fill = "lightgrey", col = "transparent")) panel.grid(v = -1, h = 0) panel.stripplot(...) panel.average(..., fun = mean, lwd = 3) }) ################################################### ### chunk number 18: gseaStripplot ################################################### plot(trellis.last.object()) ################################################### ### chunk number 19: LMtestSetup ################################################### params <- new("ChrMapLinearMParams", conditional = FALSE, testDirection = "up", universeGeneIds = entrezUniverse, geneStats = tstats, annotation = "hgu95av2", pvalueCutoff = 0.01, minSize = 4L) paramsCond <- params paramsCond@conditional <- TRUE ################################################### ### chunk number 20: LMtest ################################################### lmans <- linearMTest(params) lmansCond <- linearMTest(paramsCond) chrSortOrder(summary(lmans)) chrSortOrder(summary(lmansCond)) ## ################################################### ### chunk number 21: ################################################### tmpSet <- subsetByBand(nsFiltered, ct, "1p36.2") esetBWPlot(tmpSet, ylab="1p36.2", layout = c(2, 8), par.strip.text = list(cex = 0.8)) ################################################### ### chunk number 22: dotplot1p362 ################################################### plot(trellis.last.object())