### R code from vignette source 'TFARM.Rnw' ################################################### ### code chunk number 1: style ################################################### BiocStyle::latex() ################################################### ### code chunk number 2: options ################################################### options(width=90) ################################################### ### code chunk number 3: preliminaries ################################################### library(TFARM) library(GenomicRanges) ################################################### ### code chunk number 4: TFARM.Rnw:75-81 ################################################### # Load and visualize the dataset: data("MCF7_chr1") length(MCF7_chr1) MCF7_chr1 ################################################### ### code chunk number 5: TFARM.Rnw:135-148 ################################################### # Coming back to the example on the transcription factors of cell line MCF-7, # in the promotorial regions of chromosome 1. # Suppose that the user wants to find the most relevant association rules for the # prediction of the presence of the transcription factor TEAD4 and such that the # left-hand-side of the rules contains only present transcription factors. # This means extracting all the association rules with right-hand-side equal to # {TEAD4=1} setting the parameter type = TRUE; the minimun support and minimum # confidence thresholds are set, as an example, to 0.005 and 0.62, respectively: r_TEAD4 <- rulesGen(MCF7_chr1, "TEAD4=1", 0.005, 0.62, TRUE) dim(r_TEAD4) head(r_TEAD4) ################################################### ### code chunk number 6: TFARM.Rnw:165-183 ################################################### # Transcription factors present in at least one of the regions in the considered dataset: c <- names(mcols(MCF7_chr1)) c lc <- length(c) names(presAbs(c, r_TEAD4, TRUE)) # Transcription factors present in at least one of the association rules: p_TFs <- presAbs(c, r_TEAD4, TRUE)$pres p_TFs # Transcription factors absent in all the association rules: a <- presAbs(c[1:lc], r_TEAD4, TRUE)$abs a ################################################### ### code chunk number 7: TFARM.Rnw:308-314 ################################################### # To find the subset of rules containing the transcription factor FOSL2: r_FOSL2 <- rulesTF(TFi = 'FOSL2=1', rules = r_TEAD4, verbose = TRUE) head(r_FOSL2) dim(r_FOSL2)[1] ################################################### ### code chunk number 8: TFARM.Rnw:318-323 ################################################### # If none of the rules in input to rulesTF contains the given item TFi, # and verbose = TRUE, a warnig message is reported to the user: r_CTCF <- rulesTF(TFi = 'CTCF=1', rules = r_TEAD4, verbose = TRUE) ################################################### ### code chunk number 9: TFARM.Rnw:338-342 ################################################### # For example to evaluate FOSL2 importance in the set of rules r_FOSL2: r_noFOSL2 <- rulesTF0('FOSL2=1', r_FOSL2, r_TEAD4, MCF7_chr1, "TEAD4=1") ################################################### ### code chunk number 10: TFARM.Rnw:345-347 ################################################### head(r_noFOSL2) ################################################### ### code chunk number 11: IComp ################################################### # Perform the IComp function to compute the Importance Index distribution: imp_FOSL2 <- IComp('FOSL2=1', r_FOSL2, r_noFOSL2, figures=TRUE) names(imp_FOSL2) imp_FOSL2$imp head(imp_FOSL2$delta) head(imp_FOSL2$rwi) head(imp_FOSL2$rwo) ################################################### ### code chunk number 12: TFARM.Rnw:390-412 ################################################### # For the considered example the user could run: all <- lapply(p_TFs, function(pi) { A <- rulesTF(pi, r_TEAD4, FALSE) B <- rulesTF0(pi, A, r_TEAD4, MCF7_chr1, "TEAD4=1") IComp(pi, A, B, figures=FALSE) }) for (i in 1:length(p_TFs)) { IMP_Z[[i]] <- all[[i]]$imp # Extract the delta variations of support, confidence and lift: DELTA[[i]] <- all[[i]]$delta } IMP <- data.frame( TF = p_TFs, imp = sapply(IMP_Z, mean), sd = sapply(IMP_Z, sd), nrules = sapply(IMP_Z, length), stringsAsFactors=FALSE ) ################################################### ### code chunk number 13: TFARM.Rnw:415-422 ################################################### # Sort by imp column of IMP library(plyr) IMP.ord <- arrange(IMP, desc(imp)) IMP.ord ################################################### ### code chunk number 14: IPCA ################################################### # Select the candidate co-regulators and the number of rules associated with them, then # perform the Principal Component Analysis: colnames(IMP) TF_Imp <- data.frame(IMP$TF, IMP$imp, IMP$nrules) i.pc <- IPCA(DELTA, TF_Imp) names(i.pc) i.pc$summary head(i.pc$loadings) head(i.pc$scores) ################################################### ### code chunk number 15: distribViz ################################################### # Considering for example the candidate co-regulator transcription factors # found in the set of rules r_TEAD4: distribViz(IMP_Z, p_TFs) ################################################### ### code chunk number 16: TFARM.Rnw:531-552 ################################################### # Select the index of the list of importances IMP_Z # containing importance distributions of transcription factor ZNF217 ZNF217_index <- which(p_TFs == 'ZNF217=1') # select outlier rules where ZNF217 has importance greater than 5 o <- IMP_Z[[ZNF217_index]] > 5 rule_o <- all[[ZNF217_index]]$rwi[o,] rule_o # So, ZNF217 is very relevant in the pattern of transcription factors # {FOSL2=1,GABPA=1,MYC=1,ZNF217=1} # for the prediction of the presence of TEAD4. # To extract support, confidence and lift of the corresponding rule without ZNF217: all[[ZNF217_index]]$rwo[o,] # Since the measure of the rule obtained removing ZNF217 is equal to zero, # the rule {FOSL2=1,GABPA=1,MYC=1,ZNF217=0} -> {TEAD4=1}, # obtained removing ZNF217, is found in the relevant rules for the prediction # of the presence of TEAD4. ################################################### ### code chunk number 17: TFARM.Rnw:584-592 ################################################### # Construct couples as a vector in which all possible combinations of # transcription factors (present in at least one association rules) # are included: couples_0 <- combn(p_TFs, 2) couples <- paste(couples_0[1,], couples_0[2,], sep=',') head(couples) ################################################### ### code chunk number 18: TFARM.Rnw:595-623 ################################################### # The evaluation of the mean Importance Index of each pair is # then computed similarly as previously done for single transcription factors: # Compute rulesTF, rulesTF0 and IComp for each pair, avoiding pairs not # found in the r_TEAD4 set of rules IMP_c <- lapply(couples, function(ci) { A_c <- rulesTF(ci, r_TEAD4, FALSE) if (all(!is.na(A_c[[1]][1]))){ B_c <- rulesTF0(ci, A_c, r_TEAD4, MCF7_chr1, "TEAD4=1") IComp(ci, A_c, B_c, figures=FALSE)$imp } }) # Delete all NULL elements and compute the mean Importance Index of each pair I_c <- matrix(0, length(couples), 2) I_c <- data.frame(I_c) I_c[,1] <- paste(couples) null.indexes <- vapply(IMP_c, is.null, numeric(1)) IMP_c <- IMP_c[!null.indexes] I_c <- I_c[!null.indexes,] I_c[,2] <- vapply(IMP_c, mean, numeric(1)) colnames(I_c) <- colnames(IMP[,1:2]) ################################################### ### code chunk number 19: TFARM.Rnw:626-632 ################################################### # Select rows with mean Importance Index different from NaN, then order I_c: I_c <- I_c[!is.na(I_c[,2]),] I_c_ord <- arrange(I_c, desc(imp)) head(I_c_ord) ################################################### ### code chunk number 20: heatmap ################################################### # Construction of a vector in which mean Importance Index values of pairs # of transcription factors are represented. # These transcription factors are taken from the output of presAbs as # present in at least one association rules. # The function rbind is used to combine IMP columns and I_c_ord columns and # then the function arrange orders the data frame by the imp column. I_c_2 <- arrange(rbind(IMP[,1:2], I_c_ord), desc(imp)) i.heat <- heatI(p_TFs, I_c_2)