## ----include=FALSE, cache=FALSE------------------------------------------ require(png) require(grid) require(gridExtra) suppressMessages(require(LINC)) data(BRAIN_EXPR) ## ----eval = FALSE-------------------------------------------------------- ## str(GTEX_CRBL) ## ## num [1:56318, 1:117] 0.0475 10.7799 0.105 0 0 ... ## - attr(*, "dimnames")=List of 2 ## ..$ : chr [1:56318] "ENSG00000223972" "ENSG00000227232" "ENSG00000243485" "ENSG00000237613" ... ## ..$ : chr [1:117] "GTEX-117XS-3126-SM-5GIDP" "GTEX-1192X-3226-SM-5987D" "GTEX-11DXW-1026-SM-5H11K" ## "GTEX-11DXY-3126-SM-5N9BT" ... ## ## ----eval = FALSE-------------------------------------------------------- ## # STEP 1: select the high-variance genes ## var_index <- order(apply(GTEX_CRBL, 1, var), decreasing = T) ## GTEX_CRBL_HVAR <- GTEX_CRBL[var_index[1:5000], ] ## ## # STEP 2: get the gene biotype ## require(biomaRt) ## ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") ## biotype <- getBM(attributes=c('gene_biotype', ## 'ensembl_gene_id'), ## filters = 'ensembl_gene_id', ## values = rownames(GTEX_CRBL_HVAR), ## mart = ensembl) ## # STEP 3: ## index <- match(rownames(GTEX_CRBL_HVAR), biotype$ensembl_gene_id) ## GTEX_CRBL_BIOTYPE <- biotype$gene_biotype[index] ## ----eval = FALSE-------------------------------------------------------- ## table(GTEX_CRBL_BIOTYPE) ## ## 3prime_overlapping_ncRNA antisense lincRNA ## 8 279 74 ## miRNA misc_RNA Mt_rRNA ## 3 4 2 ## Mt_tRNA processed_pseudogene processed_transcript ## 4 119 37 ## protein_coding rRNA sense_intronic ## 4256 1 11 ## sense_overlapping snoRNA snRNA ## 13 8 1 ## TR_C_gene transcribed_processed_pseudogene transcribed_unprocessed_pseudogene ## 1 8 25 ## unitary_pseudogene unprocessed_pseudogene ## 2 18 ## ## ----warning = FALSE, message = FALSE, fig.width = 17, fig.height = 11, eval = TRUE---- # the preprocessed expression matrix with 1000 genes str(cerebellum) # a TRUE/FALSE vector with TRUE for protein-coding genes str(pcgenes_crbl) # STEP 1: Separate the protine-coding genes from the queries (lincRNAs) crbl_matrix <- linc(cerebellum, codingGenes = pcgenes_crbl) # STEP 2: Compute the co-expression network with a fixed threshold crbl_cluster <- clusterlinc(crbl_matrix, pvalCutOff = 0.005) # STEP 3: Interrogate enriched biological terms for co-expressed genes crbl_bp <- getbio(crbl_cluster) # Show the results as a plot! plotlinc(crbl_bp) ## ---- message = FALSE---------------------------------------------------- getcoexpr(crbl_cluster, query = "55384")[1:5] # The co-expressed genes can also be returned as gene symbols. getcoexpr(crbl_cluster, query = "55384", keyType = 'SYMBOL')[1:5] ## ----fig.width = 23, fig.height = 10, warning = FALSE, message = FALSE, eval = TRUE---- meg3 <- singlelinc(crbl_matrix, query = "55384", onlycor = TRUE, underth = FALSE, threshold = 0.5, ont = 'MF') plotlinc(meg3) ## ----fig.width = 17, fig.height = 9.5, warning = FALSE, message = FALSE, eval = TRUE---- data(BRAIN_EXPR) # (A) call 'linc' with no further arguments # 'cerebellum' is a matrix of expression values; rows correspond to genes # 'pcgenes_crbl' is a TRUE/FALSE vector; TRUE indicates a protein-coding gene crbl_matrix <- linc(cerebellum, codingGenes = pcgenes_crbl) # (B) remove first seven principle components crbl_matrix_pca <- linc(cerebellum, codingGenes = pcgenes_crbl, rmPC = c(1:7)) # (C) negative correlation by using 'userFun' crbl_matrix_ncor <- linc(cerebellum, codingGenes = pcgenes_crbl, userFun = function(x,y){ -cor(x,y) }) # (D) remove outliers using the ESD method crbl_matrix_esd <- linc(cerebellum, codingGenes = pcgenes_crbl, outlier = "esd") # (E) plot this object plotlinc(crbl_matrix_esd) ## ----eval = FALSE-------------------------------------------------------- ## plotlinc(crbl_matrix) ## ----fig.width = 23, fig.height = 10, warning = FALSE, message = FALSE, eval = TRUE---- meg3_pca <- singlelinc(crbl_matrix_pca, query = "55384", onlycor = TRUE, underth = FALSE, threshold = 0.5, ont = 'MF') plotlinc(meg3_pca) ## ------------------------------------------------------------------------ intersect(getcoexpr(meg3), getcoexpr(meg3_pca)) ## ----warning = FALSE, eval = TRUE---------------------------------------- # results() - results (different for subclasses) # correlation() - correlation matrices # assignment() - vector of protein-coding genes # history() - stored parameters # express() - original expression matrix cerebellum <- express(crbl_cluster) str(cerebellum) ## ----warning = FALSE, eval = TRUE---------------------------------------- # getlinc() is used to accesss information getlinc(crbl_cluster, subject = "queries") ## ----warning = FALSE, eval = TRUE---------------------------------------- # feature() can be used to convert objects crbl_matrix <- crbl_cluster + feature(setLevel = "LINCmatrix", showLevels = FALSE) ## ----warning = FALSE, eval = TRUE---------------------------------------- # change the used gene annotation, here from "human" to "mouse" murine_matrix <- changeOrgDb(crbl_matrix, OrgDb = 'org.Mm.eg.db') ## ----fig.width = 11, fig.height = 7, warning = FALSE, eval = TRUE-------- # scatterplots, correlations and expression level of query plotlinc(crbl_cluster, showCor = c("647979", "6726", "3337", "3304" ,"3320")) ## ----eval = FALSE-------------------------------------------------------- ## linctable(file_name = "crbl_co_expr", input = crbl_cluster) ## ----fig.width = 14, fig.height = 12, warning = FALSE, message = FALSE, eval = FALSE---- ## justlinc(GTEX_LIVER_CRUDE) # 'justlinc' will search for the 10 best candidates ## ## my_lincRNAs <- c("ENSG00000197813") # This could also be a vector of ids ## res <- justlinc(GTEX_LIVER_CRUDE, targetGenes = my_lincRNAs) # 'justlinc' with one query ## ----include=FALSE, cache=FALSE------------------------------------------ data(BRAIN_EXPR) ## ----fig.width = 13, fig.height = 7.5, warning = FALSE, message = FALSE, eval = TRUE---- # apply the distance method "correlation" instead of "dicedist", the default crbl_cluster_cor <- clusterlinc(crbl_matrix, distMethod = "correlation") plotlinc(crbl_cluster_cor) ## ----fig.width = 13.5, fig.height = 6.5, warning = FALSE, fig.keep = 'last', eval = TRUE---- # add custom names and colors gbm_cluster <- gbm_cluster + feature(customID = "CANCER_GBM", customCol = "red") ctx_cluster <- ctx_cluster + feature(customID = "HEALTHY_CTX", customCol = "blue") hpc_cluster <- hpc_cluster + feature(customID = "HEALTHY_HPC", customCol = "blue") crbl_cluster <- crbl_cluster + feature(customID = "HEALTHY_CRBL", customCol = "blue") # plot the dendrogram norad <- querycluster('647979', queryTitle = 'NORAD', gbm_cluster, # Glioblastoma ctx_cluster, # Cortex hpc_cluster, # Hippocampus crbl_cluster) # Cerebellum neat1 <- querycluster('283131', queryTitle = 'NEAT1', gbm_cluster, ctx_cluster, hpc_cluster, crbl_cluster) grid.arrange(norad, neat1, ncol = 2) ## ----eval = FALSE-------------------------------------------------------- ## # STEP 1: get the co-expressed genes ## norad <- lapply(list(gbm_cluster, ## ctx_cluster, ## hpc_cluster, ## crbl_cluster), ## function(x){ ## getcoexpr(x, '647979')}) ## ## # STEP 2: name the list ## names(norad) <- c("CANCER_GBM", ## "HEALTHY_CTX", ## "HEALTHY_HPC", ## "HEALTHY_CRBL") ## ## # STEP 3: enrichment analysis in 'clusterProfiler' ## require(clusterProfiler) ## norad_cluster <- compareCluster(norad, ## fun = 'enrichGO', ## OrgDb = 'org.Hs.eg.db', ## ont = 'BP') ## plot(norad_cluster) ## ----eval = FALSE-------------------------------------------------------- ## # WRAPPER ## justlinc() # gene selection, co-expression ## ## # MAIN FUNCTIONS ## linc() # cor. matrix and statistics ## clusterlinc() # cluster and cor. test ## singlelinc() # single query co-expression ## ## # PLOTTING FUNCTIONS ## plotlinc() # main plotting function ## querycluster() # one query in multiple data sets ## ## # HELPING FUNCTIONS ## getcoexpr() # get the co-expressed genes ## getbio() # enriched terms for a cluster ## object + feature() # level and data labeling ## getlinc() # subsetting of 'LINC' objects ## changeOrgDb() # change organism ## linctable() # write to table ## ----echo=FALSE, fig.width = 8, fig.height = 3.5, eval = TRUE------------ overview_img <- readPNG(system.file("extdata", "overview_img.png", package ="LINC")) overview_plot <- rasterGrob(overview_img, interpolate = TRUE) grid.arrange(overview_plot)