## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("ath1121501frmavecs") ## ----------------------------------------------------------------------------- library(ath1121501frmavecs) library(GEOquery) library(affy) library(frma) # 1. Download the CEL files GSM_considered <- c("GSM433634", "GSM1179807", "GSM433644") for (sample in GSM_considered) { getGEOSuppFiles( sample, makeDirectory = FALSE, baseDir = tempdir(), filter_regex = "cel.gz || CEL.gz", fetch_files = TRUE ) } # 2. Obtain the CEL file paths celpaths <- grepl( pattern = paste0(c( ".*(", paste0(GSM_considered, collapse = "|"), ").*" ), collapse = ""), x = list.files(tempdir()), ignore.case = TRUE ) celpaths <- list.files(tempdir())[celpaths] celpaths <- celpaths[grepl(pattern = "cel.gz", celpaths, ignore.case = TRUE)] celpaths <- file.path(tempdir(), celpaths) # 3. Load and preprocess the data cel_batch <- read.affybatch(celpaths) data(ath1121501frmavecs) cel_frma <- frma( object = cel_batch, target = "full", input.vec = ath1121501frmavecs, verbose = TRUE ) # 4. Extract the fRMA values cel_e <- exprs(cel_frma) cel_e <- as.data.frame(cel_e) head(cel_e) ## ----------------------------------------------------------------------------- library(ath1121501.db) probeset2gene <- mapIds( ath1121501.db, keys = rownames(cel_e), column = "TAIR", keytype = "PROBEID" ) #cel_e <- cel_e[rownames(cel_e) %in% probeset2gene$array_element_name,] cel_e$AGI <- probeset2gene[match(x = rownames(cel_e), table = names(probeset2gene))] multiple_ps <- table(cel_e$AGI) multiple_ps <- names(multiple_ps[multiple_ps > 1]) todel_ps <- list() for (gene in multiple_ps) { considered <- cel_e[cel_e$AGI == gene, ] mu_considered <- apply(as.matrix(considered[, seq(1, ncol(considered) - 1)]), 1, mean) mu_considered <- mu_considered[order(mu_considered, decreasing = TRUE)] todel_ps[[gene]] <- names(mu_considered)[2:length(mu_considered)] } todel_ps <- do.call(c, todel_ps) cel_e <- cel_e[!(rownames(cel_e) %in% todel_ps), ] ## ----------------------------------------------------------------------------- sessionInfo()