## ----style, echo = FALSE, results = 'asis'--------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----echo=FALSE------------------------------------------------------------ path <- system.file(package="BiocIntro", "extdata", "BRFSS-subset.csv") ## ----ALL-choose, eval=FALSE------------------------------------------------ # path <- file.choose() # look for BRFSS-subset.csv ## ----ALL-input------------------------------------------------------------- stopifnot(file.exists(path)) brfss <- read.csv(path) ## -------------------------------------------------------------------------- names(brfss) median(brfss$Age) ## ----brfss-anyNA----------------------------------------------------------- any(is.na(brfss$Age)) anyNA(brfss$Age) # same, but more efficient ## -------------------------------------------------------------------------- median(brfss$Age, na.rm=TRUE) ## ---- eval=FALSE----------------------------------------------------------- # plot(brfss$age) # plot(sort(brfss$age)) # sortedAge = sort(brfss$age) # ?plot ## -------------------------------------------------------------------------- summary(brfss) brfss$Year <- factor(brfss$Year) ## -------------------------------------------------------------------------- brfssFemale <- brfss[brfss$Sex == "Female",] summary(brfssFemale) ## -------------------------------------------------------------------------- plot(Weight ~ Year, brfssFemale) ## -------------------------------------------------------------------------- t.test(Weight ~ Year, brfssFemale) ## -------------------------------------------------------------------------- brfss2010Male <- subset(brfss, Year == 2010 & Sex == "Male") summary(brfss2010Male) ## -------------------------------------------------------------------------- hist(brfss2010Male$Weight) hist(brfss2010Male$Height) plot(Weight ~ Height, brfss2010Male) ## -------------------------------------------------------------------------- fit <- lm(Weight ~ Height, brfss2010Male) fit ## -------------------------------------------------------------------------- anova(fit) ## -------------------------------------------------------------------------- plot(Weight ~ Height, brfss2010Male) abline(fit, col="blue", lwd=2) points(180, 88, col="red", cex=4, pch=20) ## ---- eval=FALSE----------------------------------------------------------- # class(fit) # 'noun' # methods(class=class(fit)) # 'verb' ## ---- eval=FALSE----------------------------------------------------------- # plot(fit) # ?plot.lm ## ----echo=FALSE------------------------------------------------------------ path <- system.file(package="BiocIntro", "extdata", "ALL-expression.csv") ## ----ALL-choose-again, eval=FALSE------------------------------------------ # path <- file.choose() # look for ALL-expression.csv # stopifnot(file.exists(path)) ## ----ALL-input-exprs------------------------------------------------------- exprs <- read.csv(path, row.names=1, check.names=FALSE) exprs <- as.matrix(exprs) class(exprs) dim(exprs) exprs[1:6, 1:10] range(exprs) ## ----echo=FALSE------------------------------------------------------------ path <- system.file(package="BiocIntro", "extdata", "ALL-phenoData.csv") ## ----ALL-phenoData.csv-clustering-student, eval=FALSE---------------------- # path <- file.choose() # look for ALL-phenoData.csv # stopifnot(file.exists(path)) ## -------------------------------------------------------------------------- pdata <- read.csv(path, row.names=1) class(pdata) dim(pdata) head(pdata) ## ----colors---------------------------------------------------------------- library(RColorBrewer) ## not available? install package via RStudio highlight <- brewer.pal(3, "Set2")[1:2] ## ----ALL-BorT-------------------------------------------------------------- pdata$BorT <- factor(substr(pdata$BT, 1, 1)) ## -------------------------------------------------------------------------- exprs <- t(exprs) ## -------------------------------------------------------------------------- stopifnot(identical(rownames(pdata), rownames(exprs))) ## -------------------------------------------------------------------------- d <- dist(exprs) ## -------------------------------------------------------------------------- cmd <- cmdscale(d) ## -------------------------------------------------------------------------- plot(cmd, col=highlight[pdata$BorT])