## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----install, eval = FALSE----------------------------------------------------
#  if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager", repos = "https://cran.r-project.org")

## ----install-Bioconductor, eval = FALSE---------------------------------------
#  if (BiocManager::version() >= "3.19") {
#      BiocManager::install("AlphaMissenseR")
#  } else {
#      stop(
#          "'AlphaMissenseR' requires Bioconductor version 3.19 or later, ",
#          "install from GitHub?"
#      )
#  }

## ----install-devel, eval = FALSE----------------------------------------------
#  remotes::install_github(
#      "mtmorgan/AlphaMissenseR",
#      repos = BiocManager::repositories()
#  )

## ----setup, message = FALSE---------------------------------------------------
library(AlphaMissenseR)

## ----am_browse, eval = FALSE--------------------------------------------------
#  am_browse()

## ----am_available-------------------------------------------------------------
am_available()

## ----am_data------------------------------------------------------------------
tbl <- am_data("hg38")
tbl

## ----db_connect---------------------------------------------------------------
db <- db_connect()

## ----am_data-duckdb-----------------------------------------------------------
db_tables(db)

tbl <- tbl(db, "hg38")
tbl

## ----db-am_class--------------------------------------------------------------
tbl |>
    count(am_class)

## ----db-pathogenicity---------------------------------------------------------
tbl |>
    group_by(am_class) |>
    summarize(n = n(), pathogenecity = mean(am_pathogenicity, na.rm = TRUE))

## ----REF-ALT------------------------------------------------------------------
tbl |>
    count(REF, ALT) |>
    tidyr::pivot_wider(names_from = "ALT", values_from = "n") |>
    select("REF", "A", "C", "G", "T") |>
    arrange(REF)

## -----------------------------------------------------------------------------
tbl |>
    filter(CHROM == "chr4", POS > 0, POS <= 200000)

## ----eval = FALSE-------------------------------------------------------------
#  if (!requireNamespace("GenomicRanges", quietly = TRUE))
#      BiocManager::install("GenomicRanges")

## -----------------------------------------------------------------------------
tbl <-
    am_data("hg38") |>
    filter(CHROM == "chr2", POS < 10000000, REF == "G")

## -----------------------------------------------------------------------------
gpos <-
    tbl |>
    to_GPos()
gpos

## ----eval = FALSE-------------------------------------------------------------
#  utils::browseVignettes("GenomicRanges")

## ----ahub-homo-sap------------------------------------------------------------
hub <- AnnotationHub::AnnotationHub()
AnnotationHub::query(hub, c("EnsDb", "Homo sapiens"))

AnnotationHub::AnnotationHub()["AH113665"]

## ----AnnotationHub, message = FALSE-------------------------------------------
library(ensembldb)
edb <- AnnotationHub::AnnotationHub()[["AH113665"]]
edb

## ----db_rw--------------------------------------------------------------------
db_rw <- db_connect(read_only = FALSE)

## ----tx-----------------------------------------------------------------------
bcl2l11 <-
    edb |>
    ensembldb::filter(
        ~ symbol == "BCL2L11" &
            tx_biotype == "protein_coding" &
            tx_is_canonical == TRUE
    ) |>
    exonsBy("tx")
bcl2l11

## ----temp-table---------------------------------------------------------------
bcl2l11_tbl <-
    bcl2l11 |>
    dplyr::as_tibble() |>
    dplyr::mutate(CHROM = paste0("chr", seqnames)) |>
    dplyr::select(CHROM, everything(), -seqnames)

db_temporary_table(db_rw, bcl2l11_tbl, "bcl2l11")

## ----db_tables-rw-------------------------------------------------------------
"bcl2l11" %in% db_tables(db_rw)

## ----range-join---------------------------------------------------------------
rng <- db_range_join(db_rw, "hg38", "bcl2l11", "bcl2l11_overlaps")
rng

## -----------------------------------------------------------------------------
rng |>
    dplyr::count(exon_id, am_class) |>
    tidyr::pivot_wider(names_from = "am_class", values_from = "n")

## ----db_disconnect-rw---------------------------------------------------------
db_disconnect(db_rw)

## ----filter-variants----------------------------------------------------------
variants_of_interest <-
    am_data("hg38") |>
    dplyr::filter(transcript_id %like% "ENST00000393256%")

## ----gpos---------------------------------------------------------------------
gpos <-
    variants_of_interest |>
    to_GPos()
## make gpos 'genome' and 'seqlevels' like bcl2l11
GenomeInfoDb::genome(gpos) <- "GRCh38"
GenomeInfoDb::seqlevelsStyle(gpos) <- "Ensembl"
gpos

## ----countOverlaps------------------------------------------------------------
countOverlaps(unlist(bcl2l11), gpos)

## ----db_disconnect_all--------------------------------------------------------
db_disconnect_all()

## -----------------------------------------------------------------------------
sessionInfo()