## ---- echo=FALSE, warning=FALSE-------------------------------------------- suppressPackageStartupMessages({ library(TCGAutils) library(curatedTCGAData) library(MultiAssayExperiment) library(RTCGAToolbox) library(BiocFileCache) library(rtracklayer) library(R.utils) }) ## ---- eval=FALSE----------------------------------------------------------- # if (!require("BiocManager")) # install.packages("BiocManager") # BiocManager::install("TCGAutils") ## ---- eval=FALSE----------------------------------------------------------- # library(TCGAutils) # library(curatedTCGAData) # library(MultiAssayExperiment) # library(RTCGAToolbox) # library(BiocFileCache) # library(rtracklayer) # library(R.utils) ## ---- echo = FALSE--------------------------------------------------------- suppressMessages({ coad <- curatedTCGAData::curatedTCGAData(diseaseCode = "COAD", assays = c("CNASeq", "Mutation", "miRNA*", "RNASeq2*", "mRNAArray"), dry.run = FALSE) }) ## ---- eval = FALSE--------------------------------------------------------- # coad <- curatedTCGAData::curatedTCGAData(diseaseCode = "COAD", # assays = c("CNASeq", "Mutation", "miRNA*", "RNASeq2*", "mRNAArray"), # dry.run = FALSE) ## -------------------------------------------------------------------------- curatedTCGAData("COAD", "*") ## -------------------------------------------------------------------------- sampleTables(coad) ## -------------------------------------------------------------------------- data("sampleTypes") head(sampleTypes) ## -------------------------------------------------------------------------- (tnmae <- splitAssays(coad, c("01", "11"))) ## -------------------------------------------------------------------------- (matchmae <- as(tnmae[, , c(4, 6, 7)], "MatchedAssayExperiment")) ## -------------------------------------------------------------------------- getSubtypeMap(coad) ## -------------------------------------------------------------------------- getClinicalNames("COAD") ## -------------------------------------------------------------------------- class(colData(coad)[["vital_status.x"]]) class(colData(coad)[["vital_status.y"]]) table(colData(coad)[["vital_status.x"]]) table(colData(coad)[["vital_status.y"]]) ## -------------------------------------------------------------------------- mergeColData(coad, DataFrame(race_f = factor(colData(coad)[["race"]]))) ## -------------------------------------------------------------------------- mircoad <- mirToRanges(coad) ## -------------------------------------------------------------------------- genome(coad[[4]]) <- vapply(genome(coad[[4]]), translateBuild, character(1L)) seqlevelsStyle(coad[[4]]) <- "UCSC" genome(coad[[4]]) ## -------------------------------------------------------------------------- lifturl <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg18/liftOver/hg18ToHg19.over.chain.gz" bfc <- BiocFileCache() qfile <- bfcquery(bfc, "18to19chain", exact = TRUE)[["rpath"]] cfile <- if (length(qfile) && file.exists(qfile)) { bfcquery(bfc, "18to19chain", exact = TRUE)[["rpath"]] } else { bfcadd(bfc, "18to19chain", lifturl) } chainfile <- file.path(tempdir(), gsub("\\.gz", "", basename(cfile))) R.utils::gunzip(cfile, destname = chainfile, remove = FALSE) chain <- suppressMessages( rtracklayer::import.chain(chainfile) ) ranges19 <- rtracklayer::liftOver(rowRanges(coad[[4]]), chain) ## -------------------------------------------------------------------------- rag <- coad[[4]][as.logical(lengths(ranges19))] rowRanges(rag) <- unlist(ranges19) coad[[4]] <- rag ## -------------------------------------------------------------------------- coad <- qreduceTCGA(coad) ## -------------------------------------------------------------------------- symbolsToRanges(coad) ## -------------------------------------------------------------------------- library(GenomicDataCommons) queso <- files(legacy = TRUE) %>% filter( ~ cases.project.project_id == "TCGA-COAD" & data_category == "Gene expression" & data_type == "Exon quantification") gdc_set_cache(directory = tempdir()) ## -------------------------------------------------------------------------- if (!identical(.Platform$OS.type, "windows")) { qu <- manifest(queso) qq <- gdcdata(qu$id[1:4]) makeGRangesListFromExonFiles(qq, nrows = 4) } ## -------------------------------------------------------------------------- ## Load example file found in package pkgDir <- system.file("extdata", package = "TCGAutils", mustWork = TRUE) exonFile <- list.files(pkgDir, pattern = "cation\\.txt$", full.names = TRUE) exonFile ## We add the original file prefix to query for the UUID and get the ## TCGAbarcode filePrefix <- "unc.edu.32741f9a-9fec-441f-96b4-e504e62c5362.1755371." ## Add actual file name manually makeGRangesListFromExonFiles(exonFile, fileNames = paste0(filePrefix, basename(exonFile))) ## -------------------------------------------------------------------------- grlFile <- system.file("extdata", "blca_cnaseq.txt", package = "TCGAutils") grl <- read.table(grlFile) head(grl) makeGRangesListFromCopyNumber(grl, split.field = "Sample") makeGRangesListFromCopyNumber(grl, split.field = "Sample", keep.extra.columns = TRUE) ## -------------------------------------------------------------------------- tempDIR <- tempdir() co <- getFirehoseData("COAD", clinical = FALSE, GISTIC = TRUE, destdir = tempDIR) selectType(co, "GISTIC") class(selectType(co, "GISTIC")) makeSummarizedExperimentFromGISTIC(co, "Peaks") ## -------------------------------------------------------------------------- (xbarcode <- head(colnames(coad)[["COAD_CNASeq-20160128_simplified"]], 4L)) barcodeToUUID(xbarcode) ## -------------------------------------------------------------------------- UUIDtoBarcode("ae55b2d3-62a1-419e-9f9a-5ddfac356db4", from_type = "case_id") ## -------------------------------------------------------------------------- UUIDtoBarcode("0001801b-54b0-4551-8d7a-d66fb59429bf", from_type = "file_id") ## -------------------------------------------------------------------------- head(UUIDtoUUID("ae55b2d3-62a1-419e-9f9a-5ddfac356db4", to_type = "file_id")) ## -------------------------------------------------------------------------- ## Return participant barcodes TCGAbarcode(xbarcode, participant = TRUE) ## Just return samples TCGAbarcode(xbarcode, participant = FALSE, sample = TRUE) ## Include sample data as well TCGAbarcode(xbarcode, participant = TRUE, sample = TRUE) ## Include portion and analyte data TCGAbarcode(xbarcode, participant = TRUE, sample = TRUE, portion = TRUE) ## -------------------------------------------------------------------------- ## Select primary solid tumors TCGAsampleSelect(xbarcode, "01") ## Select blood derived normals TCGAsampleSelect(xbarcode, "10") ## -------------------------------------------------------------------------- TCGAbiospec(xbarcode) ## -------------------------------------------------------------------------- ## Obtained previously sampleCodes <- TCGAbarcode(xbarcode, participant = FALSE, sample = TRUE) ## Lookup table head(sampleTypes) ## Match codes found in the barcode to the lookup table sampleTypes[match(unique(substr(sampleCodes, 1L, 2L)), sampleTypes[["Code"]]), ] ## -------------------------------------------------------------------------- data("clinicalNames") clinicalNames lengths(clinicalNames) ## -------------------------------------------------------------------------- sessionInfo()