## ----load-depenencies, echo=TRUE, message=FALSE, warning=FALSE---------------- library(sesame) library(SummarizedExperiment) library(stats4) library(sesameData) ## ----list-data, eval=TRUE, echo=TRUE------------------------------------------ sesameDataCache(keyword='KYCG') databaseSetNames = sesameData:::df_master$Title[grepl('KYCG', sesameData:::df_master$Title)] ## ----print-data, eval=TRUE, echo=TRUE----------------------------------------- head(databaseSetNames) ## ----cache-data, eval=TRUE, warning=FALSE------------------------------------- databaseSetNames = c('KYCG.MM285.seqContextN.20210630', 'KYCG.MM285.designGroup.20210210', 'KYCG.MM285.chromosome.mm10.20210630', 'KYCG.MM285.probeType.20210630') databaseSets = do.call(c, lapply(databaseSetNames, sesameDataGet)) ## ----view-data0, eval=TRUE, warning=FALSE------------------------------------- sprintf("length(databaseSets) = %s database sets", length(databaseSets)) ## ----view-data1, eval=TRUE, warning=FALSE------------------------------------- str(databaseSets[1:3]) ## ----view-data2, eval=TRUE, warning=FALSE------------------------------------- summary(databaseSets[1:3]) ## ----cache-data2, eval=TRUE, echo=TRUE---------------------------------------- MM285.tissueSignature = sesameDataGet('MM285.141.SE.tissueSignature') df = rowData(MM285.tissueSignature) querySet = df$Probe_ID[df$branch == "E-Brain"] ## ----view-data4, eval=TRUE, echo=FALSE---------------------------------------- cat(sprintf("length(querySet) = %s probes", length(querySet))) ## ----run-annotation, echo=TRUE, eval=TRUE------------------------------------- annotation = getDatabaseSetOverlap(querySet, databaseSets) head(annotation) ## ----echo = FALSE, results="asis"--------------------------------------------- library(knitr) df = data.frame( "Continuous DB"=c("Correlation","FGSEA"), "Discrete DB"=c("FGSEA","Fisher's Exact Test")) rownames(df) = c("Continuous Query", "Discrete Query") kable(df, caption="Four KnowYourCG Testing Scenarios") ## ----run-test-single, echo=TRUE, eval=TRUE------------------------------------ results = testEnrichment(querySet=querySet, databaseSets=databaseSets, verbose=FALSE) print(head(results)) ## ----plot-volcano, fig.width=7, fig.height=6, echo=TRUE, warning=FALSE-------- plotVolcano(data=results, title="Database Set Enrichment", subtitle="MM285 Mouse Platform") ## ----plot-lollipop, fig.width=7, fig.height=6, echo=TRUE---------------------- plotLollipop(data=results, title="Top Database Set Enrichment", subtitle="MM285 Mouse Platform") ## ----run-test-data, echo=TRUE, eval=TRUE-------------------------------------- KYCG.MM285.seqContextN.20210630 = sesameDataGet('KYCG.MM285.seqContextN.20210630') CpGDesity50 = KYCG.MM285.seqContextN.20210630['CpGDesity50'] distToTSS = KYCG.MM285.seqContextN.20210630['distToTSS'] ## ----run-test-other1, echo=TRUE, eval=TRUE------------------------------------ resultsCpGDensity = testEnrichment(querySet=querySet, databaseSets=CpGDesity50, platform="MM285") print(resultsCpGDensity) ## ----run-test-other2, echo=TRUE, eval=TRUE------------------------------------ resultsTSS = testEnrichment(querySet=querySet, databaseSets=distToTSS, platform="MM285") print(resultsTSS) ## ----run-test-other3, echo=TRUE, eval=TRUE------------------------------------ resultsCpGdensityTSS = testEnrichment(querySet=CpGDesity50[[1]], databaseSets=distToTSS, platform="MM285") print(resultsCpGdensityTSS) ## ----load-data-tfbs, echo=TRUE, eval=TRUE------------------------------------- databaseSets = sesameDataGet("KYCG.MM285.TFBS.20210817") ## ----run-test-tfbs, echo=TRUE, eval=TRUE, warning=FALSE----------------------- resultsTFBS = testEnrichment(querySet=querySet, databaseSets=databaseSets, platform="MM285", verbose=FALSE, return.meta=TRUE) head(resultsTFBS) ## ----plot-volcano-tfbs, fig.width=7, fig.height=6, echo=TRUE------------------ plotVolcano(data=resultsTFBS, title="Transcription Factor Binding Site Enrichment", subtitle='MM285 Mouse Platform', alpha=0.0005) ## ----plot-lollipop-tfbs, fig.width=7, fig.height=6, echo=TRUE----------------- plotLollipop(data=resultsTFBS, title="Transcription Factor Binding Site Enrichment", subtitle='MM285 Mouse Platform') ## ----run-test-gene, fig.width=7, fig.height=6, echo=TRUE, warning=FALSE------- resultsGene = testEnrichmentGene(querySet, platform="MM285", verbose=FALSE) head(resultsGene) ## ----plot-volcano-gene, fig.width=7, fig.height=6, echo=TRUE------------------ plotVolcano(data=resultsGene, title="Gene Enrichment", subtitle="MM285 Mouse Platform", n.fdr=TRUE) ## ----plot-lollipop-gene, fig.width=7, fig.height=6, echo=TRUE----------------- plotLollipop(data=resultsGene, title="Top Gene Enrichment", subtitle="MM285 Mouse Platform", n=10) ## ----run-feature-engineering-get-data, echo=TRUE, eval=TRUE------------------- se = sesameDataGet('MM285.20Kx467.SE') samplesheet = colData(se)[, c("Mouse_Age_Months", "Mouse_Age_Days", "Sex", "Strain_Corrected", "Tissue_Corrected", 'Genotype')] betas = assay(se) print(head(samplesheet)) ## ----run-feature-engineering-statistics--------------------------------------- databaseSets = do.call(c, lapply(databaseSetNames, sesameDataGet)) statistics = calcDatabaseSetStatisticsAll(betas, databaseSets=databaseSets) head(statistics[, 1:5]) ## ----feature-engineering-subsetting-statistics-------------------------------- statistics = statistics[, grepl("mean", colnames(statistics))] head(statistics[, 1:5]) ## ----feature-engineering-data-curation, warning=FALSE------------------------- data = cbind(data.frame(samplesheet), statistics) data$Sex = relevel(factor(data$Sex), 'Female') data$Strain_Corrected = relevel(factor(data$Strain_Corrected), '129/Sv') data$Tissue_Corrected = relevel(factor(data$Tissue_Corrected), 'Colon') data$Genotype = relevel(factor(data$Genotype), 'WT') data$Mouse_Age_Days = as.numeric(data$Mouse_Age_Days) data$Mouse_Age_Months = as.numeric(data$Mouse_Age_Months) ## ----feature-engineering-linear-model----------------------------------------- model = lm(Mouse_Age_Days ~ Sex + Strain_Corrected + Tissue_Corrected + Genotype + `CGI-mean` + `CTCF-mean` + `Clock-mean` + `SNP-mean` + `SpermMeth-mean` + `VMR-mean`, data=data) ## ----------------------------------------------------------------------------- sessionInfo()