download code
## Chunk 1
library("hgu95av2")
chrVec = unlist(as.list(hgu95av2CHR))
table(chrVec)
class(chrVec)
#names(chrVec)[1:10]
## Chunk 2
byChr = split(names(chrVec), chrVec)
sapply(byChr, length)
## Chunk 3
byChr[["Y"]]
## Chunk 4
library("hgu95av2")
hgu95av2MAP$"1001_at"
## Chunk 5
myPos = eapply(hgu95av2MAP, function(x) grep("^17p", x, value=TRUE))
myPos = unlist(myPos)
length(myPos)
## Chunk 6
myFindMap = function(mapEnv, which) {
  myg = ppc(which)
  a1 = eapply(mapEnv, function(x) grep(myg, x, value=TRUE))
  unlist(a1)
}
## Chunk 7
x = matrix(1:6, nc=2, dimnames=list(letters[1:3], LETTERS[1:2]))
y = matrix(21:26, nc=2, dimnames=list(letters[6:8], LETTERS[3:4]))
cbind(x,y)
rbind(x,y)
## Chunk 8
s1 = list(a=1:3,b= 11:12,c= letters[1:6])
ss = stack(s1)
ss
unsplit(s1, ss[,2])
## Chunk 9
mapP = as.list(hgu95av2MAP)
mLens = unlist(eapply(hgu95av2MAP, length))
## Chunk 10
mlt = table(mLens)
mlt
## Chunk 11
len3 = mLens[mLens==3]
hgu95av2SYMBOL[[names(len3)[1]]]
hgu95av2MAP[[names(len3)[1]]]
## Chunk 12
len2 = names(mLens[mLens==2])
len2EG = unlist(mget(len2, hgu95av2ENTREZID))
len2EG = len2EG[!duplicated(len2EG)]
len2 = len2[!duplicated(len2EG)]
mapP = mget(len2, hgu95av2MAP)
hasX = sapply(mapP, function(x) if( length(grep("^X", x)) == 1) TRUE
else FALSE)
hasY = sapply(mapP, function(x) if( length(grep("^Y", x)) == 1) TRUE
else FALSE)
table(hasX & hasY)
## Chunk 13
missingMap = unlist(eapply(hgu95av2MAP, 
    function(x) any(is.na(x))))
table(missingMap)
## Chunk 14
mapPs = sapply(mapP, function(x) x[1])
mapPs = mapPs[!is.na(mapPs)]
mapByPos = split(names(mapPs), mapPs)
table(sapply(mapByPos, length))
## Chunk 15
library("RSQLite")
m = dbDriver("SQLite")
con = dbConnect(m, dbname="test")
data(USArrests)
dbWriteTable(con, "USArrests", USArrests, overwrite = TRUE)
dbListTables(con)
## Chunk 16
rs = dbSendQuery(con, "select * from USArrests")
d1 = fetch(rs, n = 5)
d1
dbHasCompleted(rs)
dbListResults(con)
d2 = fetch(rs, n = -1)
dbHasCompleted(rs)
dbClearResult(rs)
## Chunk 17
dbListTables(con)
dbListFields(con, "USArrests")
## Chunk 18
query = paste("SELECT name FROM sqlite_master WHERE",
    "type='table' ORDER BY name;")
rs = dbSendQuery(con, query)
fetch(rs, n= -1)
## Chunk 19
rs = dbSendQuery(con, 
    "SELECT * FROM USArrests WHERE Murder > 10")
## Chunk 20
unlink("test")
## Chunk 21
 library("RSQLite")
 m = dbDriver("SQLite")
 ##open up our test db    
 testDB = system.file("extdata/hgu95av2-sqlite.db", package="RBioinf")
 con = dbConnect(m, dbname = testDB)
 tabs = dbListTables(con)
 tabs 
 dbListFields(con, tabs[2])
## Chunk 22
 query = paste("SELECT go_ont.go_id, go_ont.ont,", 
     "go_ont_name.ont_name FROM go_ont,",
     "go_ont_name WHERE (go_ont.ont = go_ont_name.ont)")
 rs = dbSendQuery(con, query)
 f3 = fetch(rs, n=3)
 f3
 dbClearResult(rs)
  (solution chunk)
## Chunk 24
query = paste("SELECT g1.*, g2.evi FROM go_probe g1,",
    "go_probe g2 WHERE  (g1.go_id = 'GO:0005737' ",
    "AND g2.go_id = 'GO:0005737') ",
    "AND (g1.affy_id = g2.affy_id) ",
    "AND (g1.evi = 'IDA' AND g2.evi = 'ISS')")
 rs = dbSendQuery(con, query)
 fetch(rs)
## Chunk 25
library("hgu95av2.db")
mycon = hgu95av2_dbconn()
## Chunk 26
colnames(hgu95av2GO)
toTable(hgu95av2GO)[1:10,]
Lkeys(hgu95av2GO)[1:10]
Rkeys(hgu95av2GO)[1:10]
## Chunk 27
links(hgu95av2GO)[1:10,]
## Chunk 28
is(hgu95av2SYMBOL, "Bimap")
rmMAP = revmap(hgu95av2SYMBOL)
rmMAP$"ABL1"
## Chunk 29
 myl=list(a="w", b="x", c="y")
 revmap(myl)
## Chunk 30
queryAlias = function(x) {
  it = paste("('", paste(x, collapse="', '"), "'", sep="")
  paste("select _id, alias_symbol from alias",
        "where alias_symbol in", it, ");")
}
queryGeneinfo = function(x) {
  it = paste("('", paste(x, collapse="', '"), "'", sep="")
  paste("select _id, symbol from gene_info where",
        "symbol in", it, ");")
}
queryGenes = function(x) {
  it = paste("('", paste(x, collapse="', '"), "'", sep="")
  paste("select * from genes where _id in", it,  ");")
}
findEGs = function(dbcon, symbols) {
  rs = dbSendQuery(dbcon, queryGeneinfo(symbols))
  a1 = fetch(rs, n=-1)
  stillLeft = setdiff(symbols, a1[,2])
  if( length(stillLeft)>0 ) {
    rs = dbSendQuery(dbcon, queryAlias(stillLeft))
    a2 = fetch(rs, n=-1)
    names(a2) = names(a1)
    a1 = rbind(a1, a2)
  } 
  
  rs = dbSendQuery(dbcon, queryGenes(a1[,1]))
  ans = merge(a1, fetch(rs, n=-1))
  dbClearResult(rs)
  ans
}
## Chunk 31
findEGs(mycon, c("ALL1", "AF4", "BCR", "ABL"))
## Chunk 32
GOdbloc = system.file("extdata", "GO.sqlite", package="GO.db")
attachSql = paste("ATTACH '", GOdbloc, "' as go;", sep = "")
dbGetQuery(mycon, attachSql)
## Chunk 33
sql = paste("SELECT DISTINCT a.go_id AS 'hgu95av2.go_id',",
            "a._id AS 'hgu95av2._id',",
            "g.go_id AS 'GO.go_id', g._id AS 'GO._id',",
            "g.ontology",
            "FROM go_bp_all AS a, go.go_term AS g", 
            "WHERE a.go_id = g.go_id LIMIT 10;")
dataOut = dbGetQuery(mycon, sql)
dataOut
## Chunk 34
## schema = capture.output(hgu95av2_dbschema())
## head(schema, 18)
## Chunk 35
qcdata = capture.output(hgu95av2())
head(qcdata, 20)
## Chunk 36
## hgu95av2MAPCOUNTS
## hgu95av2_dbInfo()
## Chunk 37
tryCatch(library("human.db0"), error=function(e) {
      source("http://bioconductor.org/biocLite.R")
      biocLite("human.db0")
      library("human.db0") } )
## Chunk 38
##   source("http://bioconductor.org/biocLite.R")
##   biocLite("human.db0")
## Chunk 39
hgu95av2_IDs = system.file("extdata", 
                           "hgu95av2_ID", 
                           package="AnnotationDbi")
#Then specify some of the metadata for my database
myMeta = c("DBSCHEMA" = "HUMANCHIP_DB",
    "ORGANISM" = "Homo sapiens",
    "SPECIES" = "Human",
    "MANUFACTURER" = "Affymetrix",
    "CHIPNAME" = "Affymetrix Human Genome U95 Set Version 2",
    "MANUFACTURERURL" = "http:www.affymetrix.com")
## Chunk 40
tmpout = tempdir()
popHUMANCHIPDB(affy = FALSE, prefix = "hgu95av2Test",
    fileName = hgu95av2_IDs, metaDataSrc = myMeta,
    baseMapType = "gb", outputDir = tmpout,
    printSchema = TRUE)
## Chunk 41
seed <- new("AnnDbPkgSeed",
            Package = "hgu95av2Test.db",
            Version = "1.0.0",
            PkgTemplate = "HUMANCHIP.DB",
            AnnObjPrefix = "hgu95av2Test")
makeAnnDbPkg(seed, 
             file.path(tmpout, "hgu95av2Test.sqlite"),
             dest_dir = tmpout)
## Chunk 42
makeHUMANCHIP_DB(affy=FALSE,
    prefix="hgu95av2",
    fileName=hgu95av2_IDs,
    baseMapType="gb",
    outputDir = tmpout,
    version="2.1.0",
    manufacturer = "Affymetrix",
    chipName = "Affymetrix Human Genome U95 Set Version 2",
    manufacturerUrl = "http://www.affymetrix.com")
## Chunk 43
Yeastfn = system.file("extdata", "yeast_small-01.xml", package="RBioinf")
## Chunk 44
yeastIntAct = xmlTreeParse(Yeastfn)
nsY = xmlNamespaceDefinitions(xmlRoot(yeastIntAct))
ns = getDefaultNamespace(xmlRoot(yeastIntAct))
namespaces = c(ns = ns)
  (solution chunk)
## Chunk 46
nullf = function(x, ...) NULL
yeast2 = xmlTreeParse(Yeastfn, 
    handlers = list(sequence = nullf,
    organism = nullf, primaryRef = nullf, 
    secondaryRef = nullf,
    names = nullf), asTree=TRUE)
## Chunk 47
object.size(yeastIntAct)
object.size(yeast2)
## Chunk 48
yeast3 = xmlTreeParse(Yeastfn, useInternalNodes=TRUE)
f1 = getNodeSet(yeast3, "//ns:attributeList", namespaces)
length(f1)
## Chunk 49
iaM = getNodeSet(yeast3, 
    "//ns:interactionDetectionMethod//ns:fullName", 
    namespaces)
sapply(iaM, xmlValue)
f4 = getNodeSet(yeast3, "//ns:hostOrganism//ns:fullName", 
    namespaces)
sapply(f4, xmlValue)
## Chunk 50
interactors = getNodeSet(yeast3, 
   "//ns:interactorList//ns:interactor",
   namespaces)
length(interactors)
interactions = getNodeSet(yeast3, 
   "//ns:interactionList/ns:interaction",
   namespaces)
length(interactions)
## Chunk 51
 interactors = xpathApply(yeast3,
     "//ns:interactorList//ns:interactor", 
     xmlValue, namespaces = namespaces)
## Chunk 52
entSH = function(name, attrs, ...) {
          cat("Starting", name, "\n")
          level <<- attrs["level"]
          minorVersion <<- attrs["minorVersion"]
    }
e2 = new.env()
e2$level = NULL
e2$minorVersion = NULL
environment(entSH) = e2
## Chunk 53
hOrg = function(name, attrs, ...) {
         taxid <<- c(attrs["ncbiTaxId"], taxid)
      }
e3 = new.env()
e3$taxid = NULL
environment(hOrg) = e3
hInt = function(name, attrs, ...)
     numInt <<- numInt + 1
e3$numInt = 0
environment(hInt) = e3
## Chunk 54
s1 = xmlEventParse(Yeastfn,
handlers = list(entrySet = entSH, hostOrganism = hOrg,
         interactor = hInt))
environment(s1$entrySet)$level
environment(s1$hostOrganism)$taxid
environment(s1$interactor)$numInt
## Chunk 55
url = paste("http://www.bioconductor.org/checkResults/", 
    "2.1/bioc-LATEST/", sep="")
s1 = htmlTreeParse(url, useInternalNodes=TRUE)
class(s1)
## Chunk 56
f1 = getNodeSet(s1, "//a[@href]")
length(f1)
## Chunk 57
f2 = getNodeSet(s1, "//b/a[@href]")
p2 = sapply(f2, xmlValue)
length(p2)
p2[1:10]
## Chunk 58
pkgs = sapply(f1, xmlGetAttr, "href")
pkg = grep("/packages/2.1/bioc/html/", pkgs, fixed=TRUE)
## Chunk 59
 ezURL = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
 t1 = url(ezURL, open="r")
 if( isOpen(t1) ) {
    z = xmlTreeParse(paste(ezURL, "einfo.fcgi", sep=""),
     isURL=TRUE, handlers=NULL, asTree=TRUE) 
    dbL = xmlChildren(z[[1]]$children$eInfoResult)$DbList
    dbNames = xmlSApply(dbL, xmlValue)
    
    length(dbNames)
    dbNames[1:5]
} 
## Chunk 60
library("biomaRt")
head(listMarts())
## Chunk 61
ensM = useMart("ensembl")
ensData = head(listDatasets(ensM))
dim(ensData)
ensMH = useDataset("hsapiens_gene_ensembl", mart=ensM)
## Chunk 62
## ensMH = useMart("ensembl", 
##     dataset = "hsapiens_gene_ensembl")
## Chunk 63
filterSummary(ensMH)
lfilt = listFilters(ensMH, group="GENE:")
nrow(lfilt)
head(lfilt)
## Chunk 64
head(attributeSummary(ensMH))
lattr = listAttributes(ensMH, group="PROTEIN:")
lattr
## Chunk 65
entrezID = c("983", "3581", "1017") 
rval = getGene(id=entrezID, type="entrezgene", mart = ensMH) 
unique(rval$hgnc_symbol) 
## Chunk 66
ensembl = useMart("ensembl", 
    dataset = "hsapiens_gene_ensembl")
    ipro = getBM(attributes=c("entrezgene","interpro",
    "interpro_description"), 
filters = "entrezgene", values = entrezID, 
    mart = ensembl) 
ipro
## Chunk 67
 library(GEOquery) 
 gds = getGEO("GDS10") 
 eset = GDS2eSet(gds, do.log2 = TRUE) 
## Chunk 68
## s1 = experimentData(eset)
## abstract(s1)
## s1@pubMedIds
## Chunk 69
library("KEGG")
library("KEGGSOAP")
KEGGPATHID2NAME$"00740"
SoapAns = get.genes.by.pathway("path:sce00740")
SoapAns
## Chunk 70
SA = gsub("^sce:", "", SoapAns)
localAns = KEGGPATHID2EXTID$"sce00740"
setdiff(SA, localAns)