## ----install, eval=FALSE------------------------------------------------- # ## try http:// if https:// URLs are not supported # source("https://bioconductor.org/biocLite.R") # biocLite("psichomics") ## ----load, message=FALSE------------------------------------------------- library(psichomics) ## ----TCGA options-------------------------------------------------------- # Available tumour types cohorts <- getFirebrowseCohorts() # Available sample dates date <- getFirebrowseDates() # Available data types dataTypes <- getFirebrowseDataTypes() ## ----download, eval=FALSE------------------------------------------------ # # Set download folder # folder <- getDownloadsFolder() # # # Download and load most recent junction quantification and clinical data from # # TCGA/Firebrowse for Adrenocortical Carcinoma # data <- loadFirebrowseData(folder=folder, # cohort="BRCA", # data=c("Clinical", "junction_quantification"), # date="2016-01-28") # # # Select clinical and junction quantification dataset # clinical <- data[[1]]$`Clinical data` # junctionQuant <- data[[1]]$`Junction quantification (Illumina HiSeq)` ## ----prepare examples, include=FALSE------------------------------------- clinical <- readRDS("BRCA_clinical.RDS") ## ----load local, eval=FALSE---------------------------------------------- # folder <- "~/Downloads/GTEx/" # ignore <- c(".aux.", ".mage-tab.") # data <- loadLocalFiles(folder, ignore=ignore) # # # Select clinical and junction quantification dataset # clinical <- data[[1]]$`Clinical data` # junctionQuant <- data[[1]]$`Junction quantification (Illumina HiSeq)` ## ----quantify options---------------------------------------------------- # Available alternative splicing annotation annotList <- listSplicingAnnotations() annotList ## ----prepare to quantify splicing, eval=FALSE---------------------------- # # Load Human (hg19/GRCh37 assembly) annotation # human <- listSplicingAnnotations()[[1]] # annotation <- loadAnnotation(human) ## ----event types--------------------------------------------------------- # Available alternative splicing event types (skipped exon, alternative # first/last exon, mutually exclusive exons, etc.) eventType <- getSplicingEventTypes() eventType eventType <- "SE" ## ----quantify splicing, eval=FALSE--------------------------------------- # # Min. reads threshold: number of reads required to quantify a splicing event # minReads <- 10 # # psi <- quantifySplicing(annotation, junctionQuant, eventType=eventType, # minReads=minReads) ## ----load splicing, echo=FALSE------------------------------------------- psi <- readRDS("BRCA_psi.RDS") ## ----check splicing events----------------------------------------------- # Check the identifier of the splicing events in the resulting table events <- rownames(psi) head(events) ## ----survival groups----------------------------------------------------- # Get available groups in clinical data cols <- colnames(clinical) # Check the name from which to retrieve groups of interest er_expr <- grep("estrogen_receptor_status", cols, value=TRUE)[1] er_expr <- createGroupByAttribute(col=er_expr, dataset=clinical) # Discard values other than "positive" and "negative" for ER expression er_expr <- er_expr[c("positive", "negative")] ############################################################################ # Now the same for patients treated with tamoxifen. However, TCGA presents # # data for the administred drugs spread through multiple columns, so... # ############################################################################ # Look for the appropriate columns with the drugs administred drug_name <- grep("drug_name", cols, value=TRUE)[1:23] # Using the previous columns, look for either "tamoxifen" or "tamoxiphen" tamoxifen <- lapply(drug_name, function(i) grep("tamoxi.*", clinical[ , i])) # Collect all previous results and create a single list named tamoxifen tamoxifen <- sort(unique(unlist(tamoxifen))) tamoxifen <- list("tamoxifen"=tamoxifen) # Combine all previously created groups groups <- c(er_expr, tamoxifen) # Assign each patient to a group patients <- nrow(clinical) g <- groupPerPatient(groups[c("tamoxifen", "positive")], patients) # Append the created groups to the clinical dataset cl <- cbind(clinical, groups=g) ## ----help formula, eval=FALSE-------------------------------------------- # help(formula) ## ----survival by er expression and tamoxifen----------------------------- # Create the right-hand side of a formula formulaStr <- "groups" # Events are retrieved based on the information available for time to event: if # there is info for a patient, the event is considered as occurred daysToDeath <- "days_to_death" survTerms <- processSurvTerms(cl, censoring="right", event=daysToDeath, timeStart=daysToDeath, formulaStr=formulaStr, scale="years") require("survival") surv <- survfit(survTerms) pvalue <- testSurvival(survTerms) plotSurvivalCurves(surv, pvalue=pvalue) ## ----cox model----------------------------------------------------------- survTermsCox <- processSurvTerms(cl, censoring="right", event=daysToDeath, timeStart=daysToDeath, formulaStr=formulaStr, coxph=TRUE) summary(survTermsCox) ## ----plot PCA variance--------------------------------------------------- # Percentage of missing values tolerated by splicing event: tolerated missing # values are replaced with the median value of that splicing event naTolerance <- 0 # Center bu do not scale values (they are already scaled between 0 and 1) center <- TRUE scale <- FALSE # Match patients with samples samples <- colnames(psi) match <- getPatientFromSample(samples, clinical) # Filter alternative splicing quantification to colour values by positive or # negative ER expression erGroups <- getMatchingSamples(groups[c("positive", "negative")], samples, clinical, match=match) filtered_psi <- psi[ , unlist(erGroups)] # Perform principal component analysis (transpose alternative splicing # quantification to have samples as rows) pca <- performPCA(t(filtered_psi), center=center, scale.=scale, naTolerance=naTolerance) # Plot the variance explained by each principal component plotVariance(pca) ## ----plot PCA------------------------------------------------------------ plotPCA(pca, pcX="PC1", pcY="PC2", erGroups) ## ----plot loadings------------------------------------------------------- plotPCA(pca, pcX="PC1", pcY="PC2", individuals = FALSE, loadings = TRUE) ## ----diff splicing 1----------------------------------------------------- # Choose a method to use when adjusting p-values ("none" is valid) # help(p.adjust.methods) pvalueAdjust <- "BH" # Check sample types available (normal, tumour, etc.) types <- parseSampleGroups(colnames(psi)) unique(types) # Analyse by sample types (by default) and perform all statistical analyses (by # default) event <- "SE_10_+_79797062_79799962_79799983_79800373_RPS24" eventPSI <- psi[event, ] stats <- diffAnalyses(eventPSI, pvalueAdjust=pvalueAdjust, analyses = c("wilcoxRankSum", "ttest", "kruskal", "levene", "fligner")) # View(stats) plotDistribution(as.numeric(eventPSI), groups=types) ## ----diff splicing 2----------------------------------------------------- filter <- grep("Tumor|Normal", types) gr_event <- types[filter] eventPSI <- eventPSI[filter] stats <- diffAnalyses(eventPSI, groups=gr_event, pvalueAdjust=pvalueAdjust, analyses = c("wilcoxRankSum", "ttest", "kruskal", "levene", "fligner")) # View(stats) plotDistribution(as.numeric(eventPSI), groups=gr_event) ## ----diff splicing 3----------------------------------------------------- # Filter alternative splicing quantification by positive or negative ER # expression; match between patient information and samples as such: erGroups <- getMatchingSamples(groups[c("negative", "positive")], samples, clinical, match=match) eventPSI <- psi[event, unlist(erGroups)] erGroups <- rep(names(erGroups), sapply(erGroups, length)) stats <- diffAnalyses(eventPSI, groups=erGroups, pvalueAdjust=pvalueAdjust, analyses = c("wilcoxRankSum", "ttest", "kruskal", "levene", "fligner")) # View(stats) plotDistribution(as.numeric(eventPSI), groups=erGroups) ## ----splicing event data------------------------------------------------- event <- "SE_10_+_79797062_79799962_79799983_79800373_RPS24" # Assign alternative splicing quantification to patients based on their samples clinicalPSI <- getPSIperPatient(psi, match, clinical) eventPSI <- as.numeric(clinicalPSI[event, ]) # Statistics of the alternative splicing event's quantification summary(eventPSI) ## ----optimal cut-off----------------------------------------------------- opt <- optimalPSIcutoff(clinical, eventPSI, censoring="right", event="days_to_death", timeStart="days_to_death") optimalCutoff <- opt$par # Optimal exon inclusion level optimalCutoff optimalPvalue <- opt$value # Respective p-value optimalPvalue ## ------------------------------------------------------------------------ cutoff <- optimalCutoff group <- labelBasedOnCutoff(eventPSI, cutoff, label="PSI values") survTerms <- processSurvTerms(clinical, censoring="right", event="days_to_death", timeStart="days_to_death", group=group) require("survival") surv <- survfit(survTerms) pvalue <- testSurvival(survTerms) plotSurvivalCurves(surv, pvalue=pvalue) ## ----plot transcripts---------------------------------------------------- parsed <- parseSplicingEvent(event) info <- queryEnsemblByEvent(event, species="human", assembly="hg19") plotTranscripts(info, parsed$pos[[1]]) ## ----plot proteins------------------------------------------------------- parsed <- parseSplicingEvent(event) info <- queryEnsemblByEvent(event, species="human", assembly="hg19") # Some of these transcripts have no corresponding protein proteins <- info$Transcript$Translation$id protein <- proteins[[6]] # Convert protein identifier from ENSEMBL to UniProt uniprot <- ensemblToUniprot(protein) uniprot uniprot <- uniprot[[1]] plotProtein(uniprot) ## ----exploring diff splicing--------------------------------------------- # Filter alternative splicing quantification by positive or negative ER # expression; match between patient information and samples as such: erGroups <- getMatchingSamples(groups[c("negative", "positive")], samples, clinical, match=match) eventsPSI <- psi[ , unlist(erGroups)] erGroups <- rep(names(erGroups), sapply(erGroups, length)) diff <- diffAnalyses(eventsPSI, erGroups) # View(diff) ## ----survival analysis on filtered events, results="hide"---------------- # Filter statistically significant events as you see fit filter <- diff$`Wilcoxon p-value (BH adjusted)` <= 0.05 filter <- filter[!is.na(filter)] events <- rownames(diff[filter, ]) # Assign alternative splicing quantification to patients based on their samples clinicalPSI <- getPSIperPatient(psi, match, clinical) # Get time for all survival analyses of interest (the same for all, so this is # faster) daysToDeath <- "days_to_death" survTime <- getColumnsTime(clinical, event=daysToDeath, timeStart=daysToDeath) # Prepare progress bar min <- 1 max <- nrow(clinicalPSI) pb <- txtProgressBar(min, max, "Calculating optimal PSI cut-off", style=3) # Prepare empty vectors where information will be stored (faster) optimalCutoff <- rep(NA, max) optimalPvalue <- rep(NA, max) # Retrieve the optimal PSI cut-off and respective p-value for multiple events for (row in min:max) { singlePSI <- as.numeric(clinicalPSI[row, ]) opt <- optimalPSIcutoff(clinical, singlePSI, censoring="right", event=daysToDeath, timeStart=daysToDeath, survTime=survTime) optimalCutoff[row] <- opt$par # Optimal exon inclusion level optimalPvalue[row] <- opt$value # Respective p-value setTxtProgressBar(pb, row) } # Bind columns to differential splicing analysis if desired diff <- cbind(diff, optimalCutoff, optimalPvalue) # View(diff)