## ----style, echo = FALSE, results = 'asis'--------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----file.choose, eval=FALSE----------------------------------------------- # path <- file.choose() ## ----file.path, echo=FALSE------------------------------------------------- path <- system.file(package="BiocIntro", "extdata", "BRFSS-subset.csv") ## ----read.csv-------------------------------------------------------------- brfss <- read.csv(path) ## ----brfss-sex------------------------------------------------------------- table(brfss$Sex) table(brfss$Year) table(brfss$Sex, brfss$Year) with(brfss, table(Sex, Year)) # same, but easier ## ----brfss-aggregate------------------------------------------------------- with(brfss, aggregate(Weight, list(Year, Sex), mean, na.rm=TRUE)) with(brfss, aggregate(Weight, list(Year=Year, Sex=Sex), mean, na.rm=TRUE)) ## ----brfss-aggregate-formula----------------------------------------------- aggregate(Weight ~ Year + Sex, brfss, mean) # same, but more informative aggregate(. ~ Year + Sex, brfss, mean) # all variables ## ----t-test-1990----------------------------------------------------------- brfss_1990 = brfss[brfss$Year == 1990,] t.test(Weight ~ Sex, brfss_1990) t.test(Weight ~ Sex, brfss, subset = Year == 1990) ## ----brfss-boxplot, fig.width=5, fig.height=5------------------------------ boxplot(Weight ~ Year, brfss, subset = Sex == "Male", main="Males") ## ----brfss-hist, fig.width=5, fig.height=5--------------------------------- hist(brfss_1990[brfss_1990$Sex == "Female", "Weight"], main="Females, 1990", xlab="Weight" ) ## ----ALL-choose, eval=FALSE------------------------------------------------ # path <- file.choose() # look for ALL-phenoData.csv ## ----echo=FALSE------------------------------------------------------------ path <- system.file(package="BiocIntro", "extdata", "ALL-phenoData.csv") ## ----ALL-input------------------------------------------------------------- stopifnot(file.exists(path)) pdata <- read.csv(path, row.names=1) ## ----ALL-properties-------------------------------------------------------- class(pdata) colnames(pdata) dim(pdata) head(pdata) summary(pdata$sex) summary(pdata$cyto.normal) ## ----ALL-subset------------------------------------------------------------ pdata[1:5, 3:4] pdata[1:5, ] head(pdata[, 3:5]) tail(pdata[, 3:5], 3) head(pdata$age) head(pdata$sex) head(pdata[pdata$age > 21,]) ## ----ALL-subset-NA--------------------------------------------------------- idx <- pdata$sex == "F" & pdata$age > 40 table(idx, useNA="ifany") dim(pdata[idx,]) # WARNING: 'NA' rows introduced tail(pdata[idx,]) dim(subset(pdata, idx)) # BETTER: no NA rows dim(subset(pdata, (sex == "F") & (age > 40))) # alternative tail(subset(pdata,idx)) ## robust `[`: exclude NA values dim(pdata[idx & !is.na(idx),]) ## ----ALL-BCR/ABL-subset---------------------------------------------------- bcrabl <- subset(pdata, mol.biol %in% c("BCR/ABL", "NEG")) ## ----ALL-BCR/ABL-drop-unused----------------------------------------------- bcrabl$mol.biol <- droplevels(bcrabl$mol.biol) ## ----ALL-BT---------------------------------------------------------------- levels(bcrabl$BT) ## ----ALL-BT-recode--------------------------------------------------------- table(bcrabl$BT) levels(bcrabl$BT) <- substring(levels(bcrabl$BT), 1, 1) table(bcrabl$BT) ## ----ALL-BCR/ABL-BT-------------------------------------------------------- aggregate(rownames(bcrabl) ~ BT + mol.biol, bcrabl, length) ## ----ALL-aggregate--------------------------------------------------------- aggregate(age ~ mol.biol + sex, bcrabl, mean) ## ----ALL-age--------------------------------------------------------------- t.test(age ~ mol.biol, bcrabl) boxplot(age ~ mol.biol, bcrabl)