--- title: "Import GWAS summary statistics from Open GWAS" author: "

Authors: Alan Murphy, Brian Schilder and Nathan Skene

" date: "

Updated: `r format(Sys.Date(), '%b-%d-%Y')`

" csl: nature.csl output: BiocStyle::html_document: vignette: > %\VignetteIndexEntry{OpenGWAS} %\usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} --- ```{r style, echo=FALSE} knitr::opts_chunk$set(tidy = FALSE, message = FALSE) ``` ```{r} library(MungeSumstats) ``` MungeSumstats now offers high throughput query and import functionality to data from the MRC IEU [Open GWAS Project](https://gwas.mrcieu.ac.uk/). # Find GWAS datasets ```{r,eval=FALSE} #### Search for datasets #### metagwas <- MungeSumstats::find_sumstats(traits = c("parkinson","alzheimer"), min_sample_size = 1000) head(metagwas,3) ids <- (dplyr::arrange(metagwas, nsnp))$id ``` ```{r,echo=FALSE} #### Search for datasets #### #error_dwnld <- # tryCatch( # metagwas <- MungeSumstats::find_sumstats(traits = c("parkinson", # "alzheimer"), # min_sample_size = 1000), # error = function(e) e, # warning = function(w) w # ) #if(exists("metagwas")&&is.data.frame(metagwas)){#if exists downloaded fine # head(metagwas,3) # ids <- (dplyr::arrange(metagwas, nsnp))$id #} #to speed up build just create results metagwas2 <- data.frame( id=c("ieu-a-298","ieu-b-2","ieu-a-297"), trait = rep("Alzheimer's disease",3), group_name = rep("public",3), year=c(2013, 2019, 2013), author=c("Lambert","Kunkle BW","Lambert"), consortium=c("IGAP", "Alzheimer Disease Genetics Consortium (ADGC), European Alzheimer's Disease Initiative (EADI), Cohorts for Heart and Aging Research in Genomic Epidemiology Consortium (CHARGE), Genetic and Environmental Risk in AD/Defining Genetic, Polygenic and Environmental Risk for Alzheimer's Disease Consortium (GERAD/PERADES),", "IGAP" ), sex=rep("Males and Females",3), population=rep("European",3), unit=c("log odds","NA","log odds"), nsnp=c(11633,10528610,7055882), sample_size=c(74046,63926,54162), build=rep("HG19/GRCh37",3), category=c("Disease","Binary","Disease"), subcategory=rep("Psychiatric / neurological",3), ontology=rep("NA",3), mr=rep(1,3), priority=c(1,0,2), pmid=c(24162737,30820047,24162737), sd=rep(NA,3), note=c("Exposure only; Effect allele frequencies are missing; forward(+) strand", "NA","Effect allele frequencies are missing; forward(+) strand"), ncase=c(25580,21982,17008), ncontrol=c(48466,41944,37154), N=c(74046,63926,54162) ) print(head(metagwas2,3)) ``` # Import full results You can supply `import_sumstats()` with a list of as many OpenGWAS IDs as you want, but we'll just give one to save time. ```{r,eval=FALSE} datasets <- MungeSumstats::import_sumstats(ids = "ieu-a-298", ref_genome = "GRCH37") ``` ```{r,echo=FALSE} #don't actually run as this takes some time, use stashed version datasets <- list("ieu-a-298"=paste0(tempdir(),"/ieu-a-298.tsv.gz")) #stashed value datasets_data <- list("ieu-a-298"=system.file("extdata","ieu-a-298.tsv.gz", package="MungeSumstats")) ``` ## Summarise results By default, `import_sumstats` results a named list where the names are the Open GWAS dataset IDs and the items are the respective paths to the formatted summary statistics. ```{r} print(datasets) ``` You can easily turn this into a data.frame as well. ```{r} results_df <- data.frame(id=names(datasets), path=unlist(datasets)) print(results_df) ``` # Import full results (parallel) *Optional*: Speed up with multi-threaded download via [axel](https://github.com/axel-download-accelerator/axel). ```{r, eval=FALSE} datasets <- MungeSumstats::import_sumstats(ids = ids, vcf_download = TRUE, download_method = "axel", nThread = max(2,future::availableCores()-2)) ``` # Further functionality See the [Getting started vignette](https://neurogenomics.github.io/MungeSumstats/articles/MungeSumstats.html) for more information on how to use MungeSumstats and its functionality. # Session Info

```{r} utils::sessionInfo() ```