## ----setup, echo=FALSE----------------------------------------------------- knitr::opts_chunk$set(collapse=TRUE, comment = "#>") suppressPackageStartupMessages(library(universalmotif)) suppressPackageStartupMessages(library(Biostrings)) suppressMessages(suppressPackageStartupMessages(library(MotifDb))) data(ArabidopsisPromoters) data(ArabidopsisMotif) ## -------------------------------------------------------------------------- library(universalmotif) motif <- create_motif("CWWWWCC", nsites = 6) sequences <- DNAStringSet(rep(c("CAAAACC", "CTTTTCC"), 3)) motif.k2 <- add_multifreq(motif, sequences, add.k = 2) # Or: # motif.k2 <- create_motif(sequences, add.multifreq = 2) motif.k2 ## -------------------------------------------------------------------------- library(universalmotif) data(ArabidopsisMotif) data(ArabidopsisPromoters) enrich_motifs(ArabidopsisMotif, ArabidopsisPromoters, shuffle.k = 3, verbose = 0, progress = FALSE, RC = TRUE) ## -------------------------------------------------------------------------- library(universalmotif) data(ArabidopsisMotif) data(ArabidopsisPromoters) enrich_motifs(ArabidopsisMotif, ArabidopsisPromoters, shuffle.k = 3, search.mode = "positional", verbose = 0, progress = FALSE, RC = TRUE) ## -------------------------------------------------------------------------- library(universalmotif) m <- matrix(c(0.10,0.27,0.23,0.19,0.29,0.28,0.51,0.12,0.34,0.26, 0.36,0.29,0.51,0.38,0.23,0.16,0.17,0.21,0.23,0.36, 0.45,0.05,0.02,0.13,0.27,0.38,0.26,0.38,0.12,0.31, 0.09,0.40,0.24,0.30,0.21,0.19,0.05,0.30,0.31,0.08), byrow=TRUE,nrow=4) motif <- create_motif(m, alphabet = "DNA", type = "PWM") motif ## -------------------------------------------------------------------------- data(ArabidopsisPromoters) res <- scan_sequences(motif, ArabidopsisPromoters, verbose = 0, progress = FALSE, threshold = 0.6, threshold.type = "logodds") head(res) ## -------------------------------------------------------------------------- ## The first match was CTCTTTATTC, with a score of 3.931 (max possible = 6.536) library(Biostrings) bkg <- colMeans(oligonucleotideFrequency(ArabidopsisPromoters, 1, as.prob = TRUE)) bkg ## -------------------------------------------------------------------------- hit.prob <- bkg["A"]^1 * bkg["C"]^3 * bkg["G"]^0 * bkg["T"]^6 hit.prob <- unname(hit.prob) hit.prob ## -------------------------------------------------------------------------- pvals <- motif_pvalue(motif, res$score, progress = FALSE, bkg.probs = bkg) res2 <- data.frame(motif=res$motif, match=res$match, pval=pvals)[order(pvals), ] knitr::kable(head(res2), digits = 22, row.names = FALSE, format = "markdown") ## -------------------------------------------------------------------------- scores <- c(-6, -3, 0, 3, 6) k <- c(2, 4, 6, 8, 10) out <- data.frame(k = c(2, 4, 6, 8, 10), score.minus6 = rep(0, 5), score.minus3 = rep(0, 5), score.0 = rep(0, 5), score.3 = rep(0, 5), score.6 = rep(0, 5)) for (i in seq_along(scores)) { for (j in seq_along(k)) { out[j, i + 1] <- motif_pvalue(motif, scores[i], k = k[j], progress = FALSE, bkg.probs = bkg) } } knitr::kable(out, format = "markdown", digits = 10) ## -------------------------------------------------------------------------- r <- motif_pvalue(motif, pvalue = c(0.01, 0.001, 0.0001, 0.00001), progress = FALSE, bkg.probs = bkg, k = 10) ## -------------------------------------------------------------------------- r2 <- motif_pvalue(motif, score = r, progress = FALSE, bkg.probs = bkg, k = 10) res <- data.frame(pval=c(0.01, 0.001, 0.0001, 0.00001), score = r, pval.calc = r2) knitr::kable(res, format = "markdown", digits = 22) ## -------------------------------------------------------------------------- library(universalmotif) library(MotifDb) motifs <- convert_motifs(MotifDb) motifs <- filter_motifs(motifs, altname = c("M0003_1.02", "M0004_1.02")) summarise_motifs(motifs) try_all <- function(motifs, ...) { scores <- numeric(8) methods <- c("MPCC", "PCC", "MEUCL", "EUCL", "MSW", "SW", "MKL", "KL") for (i in 1:8) { scores[i] <- compare_motifs(motifs, method = methods[i], progress = FALSE, ...)[1, 2] } res <- data.frame(method = c("MPCC", "PCC", "MEUCL", "EUCL", "MSW", "SW", "MKL", "KL"), score = scores, max = c(1, NA, sqrt(2), NA, 2, NA, NA, NA), type = c("similarity", "similarity", "distance", "distance", "similarity", "similarity", "distance", "distance")) knitr::kable(res, format = "markdown") } try_all(motifs) ## -------------------------------------------------------------------------- try_all(motifs, tryRC = FALSE) ## -------------------------------------------------------------------------- try_all(motifs, use.type = "ICM") ## -------------------------------------------------------------------------- try_all(motifs, normalise.scores = TRUE) ## -------------------------------------------------------------------------- view_motifs(motifs) ## -------------------------------------------------------------------------- view_motifs(motifs, min.overlap = 99) try_all(motifs, min.overlap = 99) try_all(motifs, min.overlap = 99, normalise.scores = TRUE) ## -------------------------------------------------------------------------- try_all(motifs, min.overlap = 1) view_motifs(motifs, min.overlap = 1) ## -------------------------------------------------------------------------- motifs2 <- filter_motifs(MotifDb, altname = c("M0100_1.02", "M0104_1.02")) view_motifs(motifs2, min.mean.ic = 0) try_all(motifs2, min.mean.ic = 0) view_motifs(motifs2, min.mean.ic = 0.7) try_all(motifs2, min.mean.ic = 0.7) ## -------------------------------------------------------------------------- library(universalmotif) library(MotifDb) motifs <- convert_motifs(MotifDb) motifs <- filter_motifs(motifs, organism = "Athaliana")[1:100] tree <- motif_tree(motifs, layout = "daylight", linecol = "family", progress = FALSE) ## Make some changes to the tree in regular ggplot2 fashion: # tree <- tree + ... tree ## ----eval=FALSE------------------------------------------------------------ # library(universalmotif) # # ## 1. Once per session: via `options` # # options(meme.bin = "/path/to/meme/bin/meme") # # run_meme(...) # # ## 2. Once per run: via `run_meme` # # run_meme(..., bin = "/path/to/meme/bin/meme") ## ----eval=FALSE------------------------------------------------------------ # library(universalmotif) # data(ArabidopsisPromoters) # # ## 1. Read sequences from disk (in fasta format): # # library(Biostrings) # # # The following `read*` functions are available in Biostrings: # # DNA: readDNAStringSet # # DNA with quality scores: readQualityScaledDNAStringSet # # RNA: readRNAStringSet # # amino acid: readAAStringSet # # any: readBStringSet # # sequences <- readDNAStringSet("/path/to/sequences.fasta") # # run_meme(sequences, ...) # # ## 2. Extract from a `BSgenome` object: # # library(GenomicFeatures) # library(TxDb.Athaliana.BioMart.plantsmart28) # library(BSgenome.Athaliana.TAIR.TAIR9) # # # Let us retrieve the same promoter sequences from ArabidopsisPromoters: # gene.names <- names(ArabidopsisPromoters) # # # First get the transcript coordinates from the relevant `TxDb` object: # transcripts <- transcriptsBy(TxDb.Athaliana.BioMart.plantsmart28, # by = "gene")[gene.names] # # # There are multiple transcripts per gene, we only care for the first one # # in each: # # transcripts <- lapply(transcripts, function(x) x[1]) # transcripts <- unlist(GRangesList(transcripts)) # # # Then the actual sequences: # # # Unfortunately this is a case where the chromosome names do not match # # between the two databases # # seqlevels(TxDb.Athaliana.BioMart.plantsmart28) # #> [1] "1" "2" "3" "4" "5" "Mt" "Pt" # seqlevels(BSgenome.Athaliana.TAIR.TAIR9) # #> [1] "Chr1" "Chr2" "Chr3" "Chr4" "Chr5" "ChrM" "ChrC" # # # So we must first rename the chromosomes in `transcripts`: # seqlevels(transcripts) <- seqlevels(BSgenome.Athaliana.TAIR.TAIR9) # # # Finally we can extract the sequences # promoters <- getPromoterSeq(transcripts, # BSgenome.Athaliana.TAIR.TAIR9, # upstream = 0, downstream = 1000) # # run_meme(promoters, ...) ## ----eval=FALSE------------------------------------------------------------ # run_meme(sequences, output = "/path/to/desired/output/folder") ## ----sessionInfo, echo=FALSE----------------------------------------------- sessionInfo()