## ----eval=TRUE,echo=TRUE,message=FALSE----------------------------------- library(TFEA.ChIP) data("hypoxia_DESeq","hypoxia",package="TFEA.ChIP") # load example datasets hypoxia_table<-preprocessInputData(hypoxia_DESeq) head(hypoxia_table) head(hypoxia) hypoxia_table<-preprocessInputData(hypoxia) head(hypoxia_table) ## ----eval=TRUE,echo=TRUE------------------------------------------------- #extract vector with names of upregulated genes Genes.Upreg <- Select_genes(hypoxia_table,min_LFC = 1) #extract vector with names of non-responsive genes Genes.Control <- Select_genes(hypoxia_table, min_pval = 0.5, max_pval = 1, min_LFC = -0.25, max_LFC = 0.25) ## ----eval=TRUE,echo=TRUE,message=FALSE----------------------------------- #Conversion of hgnc to ENTREZ IDs GeneID2entrez(gene.IDs = c("EGLN3","NFYA","ALS2","MYC","ARNT")) # To translate from mouse IDs: # GeneID2entrez(gene.IDs = c("Hmmr", "Tlx3", "Cpeb4"), from.Mouse = TRUE ) ## ----eval=TRUE,echo=TRUE------------------------------------------------- CM_list_UP <- contingency_matrix(Genes.Upreg,Genes.Control) #generates list of contingency tables, one per dataset pval_mat_UP <- getCMstats(CM_list_UP) #generates list of p-values and OR from association test head(pval_mat_UP) ## ----eval=TRUE,echo=TRUE------------------------------------------------- chip_index<-get_chip_index(TFfilter = c("HIF1A","EPAS1","ARNT")) #restrict the analysis to datasets assaying these factors chip_index<-get_chip_index(encodeFilter = TRUE) # Or select ENCODE datasets only CM_list_UPe <- contingency_matrix(Genes.Upreg,Genes.Control,chip_index) #generates list of contingency tables pval_mat_UPe <- getCMstats(CM_list_UPe,chip_index) #generates list of p-values and ORs head(pval_mat_UPe) ## ----eval=FALSE,echo=TRUE------------------------------------------------ # plot_CM(pval_mat_UP) #plot p-values against ORs ## ----eval=FALSE,echo=TRUE------------------------------------------------ # HIFs<-c("EPAS1","HIF1A","ARNT") # names(HIFs)<-c("EPAS1","HIF1A","ARNT") # col<-c("red","blue","green") # plot_CM(pval_mat_UP,specialTF = HIFs,TF_colors = col) #plot p-values against ORs highlighting indicated TFs ## ----eval=TRUE,echo=TRUE------------------------------------------------- chip_index<-get_chip_index(TFfilter = c("HIF1A","EPAS1","ARNT")) #restrict the analysis to datasets assaying these factors ## ----eval=TRUE,echo=TRUE,results='hide'---------------------------------- GSEA.result <- GSEA_run(hypoxia_table$Genes, hypoxia_table$log2FoldChange, chip_index, get.RES = TRUE) #run GSEA analysis ## ----eval=TRUE,echo=TRUE------------------------------------------------- head(GSEA.result[["Enrichment.table"]]) head(GSEA.result[["RES"]][["GSM2390642"]]) head(GSEA.result[["indicators"]][["GSM2390642"]]) ## ----eval=FALSE,echo=TRUE------------------------------------------------ # TF.hightlight<-c("EPAS1","ARNT") # names(TF.hightlight)<-c("EPAS1","ARNT") # col<- c("red","blue") # plot_ES(GSEA.result,LFC = hypoxia_table$log2FoldChange,specialTF = TF.hightlight,TF_colors = col) ## ----eval=FALSE, echo=TRUE----------------------------------------------- # plot_RES(GSEA_result = GSEA.result,LFC = hypoxia_table$log2FoldChange,TF=c("EPAS1"),Accession=c("GSM2390642","GSM1642766")) ## ----eval=FALSE,echo=TRUE------------------------------------------------ # folder<-"~/peak.files.folder" # File.list<-dir(folder) # format<-"macs" # # gr.list <- lapply( # seq_along(File.list), # function( File.list, myMetaData, format ){ # # tmp<-read.table( File.list[i], ..., stringsAsFactors = FALSE ) # # file.metadata <- myMetaData[ myMetaData$Name == File.list[i], ] # # ChIP.dataset.gr<-txt2GR(tmp, format, file.metadata) # # return(ChIP.dataset.gr) # }, # File.list = File.list, # myMetadata = myMetadata, # format = format # ) ## ----eval=TRUE,echo=TRUE------------------------------------------------- # As an example of the output data("ARNT.peaks.bed","ARNT.metadata",package = "TFEA.ChIP") # Loading example datasets for this function ARNT.gr<-txt2GR(ARNT.peaks.bed,"macs1.4",ARNT.metadata) head(ARNT.gr,n=2) ## ----eval=FALSE,echo=TRUE------------------------------------------------ # dnaseClusters<-read.table(file="~/path.to.file.txt", # header = TRUE,sep="\t",stringsAsFactors = FALSE) # dnaseClusters<-makeGRangesFromDataFrame(dnaseClusters, # ignore.strand=TRUE, # seqnames.field="chrom", # start.field="chromStart", # end.field="chromEnd") ## ----eval=FALSE,echo=TRUE------------------------------------------------ # txdb<-TxDb.Hsapiens.UCSC.hg19.knownGene # Genes<-genes(txdb) # # near.gene<-findOverlaps(dnaseClusters, Genes, maxgap = 1000 ) # # dnase.sites.list<-queryHits(near.gene) # near.gene<-subjectHits(near.gene) # # gene_ids <- Genes[ near.gene ]$gene_id # DHS.database <- dnaseClusters[ dnase.sites.list ] # mcols(DHS.database)$gene_id <- gene_ids # ## ----eval=TRUE,echo=TRUE------------------------------------------------- data("DnaseHS_db","gr.list", package="TFEA.ChIP") # Loading example datasets for this function TF.gene.binding.db<-GR2tfbs_db( DnaseHS_db, gr.list ) str(TF.gene.binding.db) ## ----eval=TRUE,echo=TRUE------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) data("gr.list", package="TFEA.ChIP") # Loading example datasets for this function txdb<-TxDb.Hsapiens.UCSC.hg19.knownGene Genes<-genes(txdb) TF.gene.binding.db<-GR2tfbs_db( Genes, gr.list, distanceMargin = 0 ) str(TF.gene.binding.db) ## ----eval=TRUE,echo=TRUE------------------------------------------------- data("tfbs.database",package = "TFEA.ChIP") # Loading example datasets for this function txdb<-TxDb.Hsapiens.UCSC.hg19.knownGene gen.list<-genes(txdb)$gene_id # selecting all the genes in knownGene myTFBSmatrix<-makeTFBSmatrix(gen.list,tfbs.database) myTFBSmatrix[2530:2533,1:3] # The gene HMGN4 (Entrez ID 10473) has TFBS for this three ChIP-Seq datasets ## ----eval=FALSE,echo=TRUE------------------------------------------------ # set_user_data(binary_matrix = myTFBSmatrix, metadata = myMetaData)