###################################################
### chunk number 1: orgDemo
###################################################
##load the package
library("org.Mm.eg.db")

##look what we just loaded
ls(2)

##Data for the org packages comes from the latest UCSC data 
##which is from NCBI (UCSC calls it mm9, NCBI Build 37.1)

##Have a peak:
as.list(org.Mm.egCHRLOC)[1:4]

##Notice For each entrez gene ID, there is a start location for the UCSC genome
## negative values are the minus strand
## positve values are the positive strand

## for the stop locations use:
as.list(org.Mm.egCHRLOCEND)[1:4]

##or can use get, mget etc. with the entrez gene ID
EGs = c("18392","18414","56513")
mget(EGs, org.Mm.egCHRLOC, ifnotfound=NA)
mget(EGs, org.Mm.egCHRLOCEND, ifnotfound=NA)

##You can also retrieve ENSEMBL IDs using this package
mget(EGs, org.Mm.egENSEMBL, ifnotfound=NA)


###################################################
### chunk number 2: biomaRtDemo
###################################################
##Getting the data from biomaRt:

library("biomaRt")
##Choose a database
listMarts()[1:5,]

##Get the current ensembl database.
ensembl = useMart("ensembl")

##List the datasets therein
listDatasets(ensembl)[1:10,]
##Then set up so that you use that for this session 
##(we will choose the mouse one from NCBI build 37.1):
ensembl = useDataset("mmusculus_gene_ensembl",mart=ensembl)

##List attributes
attributes = listAttributes(ensembl)
attributes[1:10,]

##And filters
filters = listFilters(ensembl)
filters[1:10,]

##Some entrez gene IDs
EGs = c("18392","18414","56513")

##1st a Simple example to just get some gene names:
getBM(attributes = "external_gene_id", 
      filters = "entrezgene", 
      values = EGs, 
      mart=ensembl)


###################################################
### chunk number 3: biomartDemoContinued
###################################################
##Transcript starts and ends:
getBM(attributes = c("entrezgene","transcript_start","transcript_end"), 
      filters = "entrezgene", 
      values = EGs, 
      mart=ensembl)


###################################################
### chunk number 4: biomartDemoContinued2
###################################################
##Additionally, you can get exon boundaries.
##But 1st you have to find out what the attributes are called...
attributeSummary(ensembl)  

##Lets zoom in on these exon/Structure attributes
listAttributes(ensembl, category = "Structures", group = "EXON:")


###################################################
### chunk number 5: biomartDemoContinued3 eval=FALSE
###################################################
## ##Find the exon starts and stops for "56513"
## getBM(attributes = c("ensembl_exon_id","exon_chrom_start","exon_chrom_end"), 
##       filters = "entrezgene", 
##       values = "56513", 
##       mart=ensembl)


###################################################
### chunk number 6: GO and BiomaRt Example
###################################################
##We can also search based on GO terms
library(GO.db) 
GOTERM[["GO:0016564"]]

##here is what we have for EGs affiliated with that term
GOEGs = unique(org.Mm.egGO2EG[["GO:0016564"]])
GOEGs

##Then we can retrieve these from biomaRt like this:
geneLocs <- getBM(c("ensembl_gene_id", "transcript_start", 
        "transcript_end", "chromosome_name"), "entrezgene", 
         GOEGs, mart=ensembl)


###################################################
### chunk number 7: SessionInfo
###################################################
sessionInfo()