This file contains codes for the following plots (NCI-60 data) that were part of (allbib[[“meng_multivariate_2014”]])`).
load the working space, may take several seconds
#load(url("ftp://129.187.44.58/share/chen/ToAedin/mcoin_NCI60.RDa"))
load("mcoin_NCI60.RDa")
This plot show the co-structure between the the projection of the cell line data.
Figure 1 plot shows fhe first two axes of MCIA represent transcriptomic and proteomic datasets of the NCI-60 panel. Different shapes represent the respective platforms and are connected by lines where the length of the line is proportional to the divergence between the data from a same cell line. Lines are joined by a common point, representing the reference structure which maximizes covariance derived from the MCIA synthetic analysis. Colors represent the nine NCI-60 cell lines from different tissues. The epithelial and mesenchymal features are separated along the first axis (PC1, horizontal). Melanoma and leukemia cell lines were projected on the negative side of second axis (PC2, vertical).
temp <- rownames(mcoin$mcoa$Tl1)
# unique(col) - "BR" "CNS" "CO" "LE" "ME" "LC" "OV" "PR" "RE"
col <- sapply(strsplit(temp, split="\\."), function(x) x[1])
col[col %in% "BR"] <- "blue"
col[col %in% "CNS"] <- "yellow4"
col[col %in% "CO"] <- "red"
col[col %in% "LE"] <- "cyan3"
col[col %in% "ME"] <- "brown"
col[col %in% "LC"] <- "antiquewhite4"
col[col %in% "OV"] <- "orange"
col[col %in% "PR"] <- "magenta"
col[col %in% "RE"] <- "black"
# names(mcoin$coa) - "agilent" "hgu95" "hgu133" "hgu133p2"
pch <- sapply(strsplit(temp, split="\\."), function(x) x[3])
pch[pch == "1"] <- 1
pch[pch == "2"] <- 19
pch[pch == "3"] <- 15
pch[pch == "4"] <- 17
pch <- as.numeric(pch)
# tiff("Plots/Sample_space.tiff", units="mm", width=178, height=178, res=300)
par(mar=c(0.5, 0.5, 0.5, 0.5), font=2, cex=1)
plot(mcoin$mcoa$Tl1[1:2], axes=F, frame.plot=F, col=NA, xlab="", ylab="", xlim=c(-2.5, 2.4), ylim=c(-3, 1.5))
abline(v=0, h=0)
segments(mcoin$mcoa$Tl1[, 1], mcoin$mcoa$Tl1[, 2], mcoin$mcoa$SynVar[, 1], mcoin$mcoa$SynVar[, 2], col=col)
points(mcoin$mcoa$Tl1[1:2], pch=pch, col=col, xlab="", ylab="")
#The following code will add the labels to the plot
text(x=1.6, y=1.5, labels="Colon", col="red")
text(x=-2, y=-0.3, labels="CNS", col="yellow4")
text(x=2, y=-0.7, labels="Leukaemia", col="cyan3")
text(x=-0.5, y=-1.2, labels="Melanoma", col="brown")
text(x=-0.8, y=1.4, labels="Renal", col="black")
text(x=0, y=1.45, labels="Ovarian", col="orange")
text(x=0.4, y=1.2, labels="Prostate", col="magenta")
text(x=1, y=-0.4, labels="NSCLC", col="antiquewhite4")
text(x=1.9, y=0, labels="Breast", col="blue")
text(x=-0.5, y=-0.8, labels="LOXIMVI", col="brown", font=1)
text(x=1.35, y=-0.15, labels="MCF7", col="blue", font=1)
text(x=1.1, y=0.7, labels="T47D", col="blue", font=1)
text(x=-0.45, y=0.2, labels="MDAMD231", col="blue", font=1)
text(x=-1.1, y=0.55, labels="BT549", col="blue", font=1)
text(x=-1.7, y=0.45, labels="HS587T", col="blue", font=1)
text(x=2.5, y=-0.1, labels="PC1", col="black", font=3)
text(x=0.2, y=2, labels="PC2", col="black", font=3)
axis(side=1)
axis(side=2)
Figure 1B: Summarizes the concordance between platforms by representing pseudo-eigenvalue space of NCI-60 datasets. The pseudo-eigenvalue space represents overall co-structure between datasets and shows which platform contributes more to the total variance.
# tiff("Plots/Dataset_space.tiff", units="mm", width=89, height=60, res=600)
par(mar=c(4, 4, 1, 6.5), font=1, cex=1, xpd=TRUE, bg="white")
plot(mcoin$mcoa$cov2[, 1:2], axes=F, frame.plot=F, col=NA,
xlab="pseudoeig 1", ylab="pseudoeig 2", xlim=c(0.115, 0.135), ylim=c(0.09, 0.11))
points(mcoin$mcoa$cov2[, 1:2], pch=c(1, 19, 15, 17), col="black", cex=1.5)
axis(side=1, at=seq(0.115, 0.135, by=0.005), line=0.6)
axis(side=2, at=seq(0.09, 0.11, by=0.005), line=0.6)
legend(x=0.135, y=0.11, pch=c(1, 19, 15, 17), legend=c("Agilent", "HGU95", "HGU133", "HGU133p2"), box.col=NA, cex=1)
# dev.off()
This figure wasn’t included as we plotted the features from each dataset separtely, however you can easily plot them in the same space.
# tiff("Plots/Mol_space1.tiff", units="mm", width=84, height=84, res=600)
par(mar=c(0.5, 0.5, 0.5, 0.5), font=2, cex=1)
ind <- mcoin$mcoa$TC$"T"
agi <- mcoin$mcoa$Tco[ind %in% "1", 1:2]
h95 <- mcoin$mcoa$Tco[ind %in% "2", 1:2]
h133 <- mcoin$mcoa$Tco[ind %in% "3", 1:2]
hp2 <- mcoin$mcoa$Tco[ind %in% "4", 1:2]
plot(mcoin$mcoa$Tco[, 1:2], axes=F, frame.plot=F, ylim=c(-4, 3.2), col=NA, xlab="", ylab="")
abline(v=seq(-10, 10, by=1), h=seq(-10, 10, by=1), col="gray75")
abline(v=0, h=0)
points(agi, pch=1, col="black", cex=0.5)
points(h95, pch=19, col="blue", cex=0.5)
points(h133, pch=15, col="cyan2", cex=0.5)
points(hp2, pch=17, col="orange", cex=0.5)
text(3.4, 3.1, "d=1")
text(3.3, 0.3, "PC1", font=3)
text(-0.5, 3.1, "PC2", font=3)
# dev.off()
Figure 2 Shows the projection of the respective cell lines from the NCI-60. Colors represent tissue types as in Figure 1. (B-E) represent the coordinates of genes in transcriptomic data and (F) shows proteins from proteomics dataset. The top genes/proteins at the end of each MCIA axes are labeled in red, indicating that those features were presented in at least four platforms and located at the same direction from the origin
# tiff("Plots/Mol_space2.tiff", units="mm", width=178, height=178, res=600)
par(mar=c(0.5, 0.5, 0.5, 0.5), font=2, cex=1, xpd=FALSE)
ind <- mcoin$mcoa$TC$"T"
agi <- mcoin$mcoa$Tco[ind %in% "1", 1:2]
h95 <- mcoin$mcoa$Tco[ind %in% "2", 1:2]
h133 <- mcoin$mcoa$Tco[ind %in% "3", 1:2]
hp2 <- mcoin$mcoa$Tco[ind %in% "4", 1:2]
prot <- mcoin$mcoa$Tco[ind %in% "5", 1:2]
layout(matrix(1:6, 2, 3))
plot(mcoin$mcoa$Tco[, 1:2], axes=F, frame.plot=F, ylim=c(-4, 3.2), col=NA, xlab="", ylab="")
abline(v=seq(-10, 10, by=1), h=seq(-10, 10, by=1), col="gray75")
abline(v=0, h=0)
points(agi, pch=20, col="gray25", cex=0.5)
box()
# text(3, 3.1, "d=1")
text(3, 0.3, "PC1", font=3)
text(-0.8, 3.1, "PC2", font=3)
text(x=-2.5, y=-4, labels="Agilent")
text(x=3, y=3, labels="d=1")
plot(mcoin$mcoa$Tco[, 1:2], axes=F, frame.plot=F, ylim=c(-4, 3.2), col=NA, xlab="", ylab="")
abline(v=seq(-10, 10, by=1), h=seq(-10, 10, by=1), col="gray75")
abline(v=0, h=0)
points(h95, pch=19, col="blue", cex=0.5)
box()
text(3, 0.3, "PC1", font=3)
text(-0.8, 3.1, "PC2", font=3)
text(x=-2.5, y=-4, labels="Affy HGU95")
plot(mcoin$mcoa$Tco[, 1:2], axes=F, frame.plot=F, ylim=c(-4, 3.2), col=NA, xlab="", ylab="")
abline(v=seq(-10, 10, by=1), h=seq(-10, 10, by=1), col="gray75")
abline(v=0, h=0)
points(h133, pch=15, col="cyan2", cex=0.5)
box()
text(3, 0.3, "PC1", font=3)
text(-0.8, 3.1, "PC2", font=3)
text(x=-2.5, y=-4, labels="Affy HGU133")
plot(mcoin$mcoa$Tco[, 1:2], axes=F, frame.plot=F, ylim=c(-4, 3.2), col=NA, xlab="", ylab="")
abline(v=seq(-10, 10, by=1), h=seq(-10, 10, by=1), col="gray75")
abline(v=0, h=0)
points(hp2, pch=17, col="orange", cex=0.5)
box()
text(3, 0.3, "PC1", font=3)
text(-0.8, 3.1, "PC2", font=3)
text(x=-2.5, y=-4, labels="Affy HGU133plus2")
# the protein space
plot(mcoin$mcoa$Tco[, 1:2], axes=F, frame.plot=F, ylim=c(-4, 3.2), col=NA, xlab="", ylab="")
abline(v=seq(-10, 10, by=1), h=seq(-10, 10, by=1), col="gray75")
abline(v=0, h=0)
points(prot, pch=17, col="black", cex=0.5)
box()
text(3, 0.3, "PC1", font=3)
text(-0.8, 3.1, "PC2", font=3)
text(x=-2.5, y=-4, labels="Proteome")
# dev.off()
Figure S5 Heatmap shows the RV coefficients between each pair of normalized datasets, representing a high degree of overall similarity in the structure of transcriptomics and proteomic datasets.
RV <- mcoin$mcoa$RV
RV <- round(RV, digits=2)
RV[lower.tri(RV)] <- NA
# png(filename="NCI60_heatmap.png", width=120, height=90, units="mm", res=100)
pheatmap(RV, cluster_rows=F, cluster_cols=F, display_numbers=T,
border_color="white", cellwidth=30, cellheight=30)
# dev.off()
Figure S4 Pseudo-eigenvalues of the NCI-60 data, including four microarray datasets and one proteomic data. Each pseudo-eigenvalue is associated with a principal component (linearly uncorrelated variables defined by MCIA) indicating the variance explained by each PC. Barplot shows the pseudo-eigenvalues (left axis) and blue line corresponds to the percentage of variance of each PC, calculated as the eigenvalue divided by sum of all eigenvalues. The first three PCs represent 17.6%, 14.2% and 9.7% of the total inertia.
# tiff("Plots/NCI60_Eig.tiff", units="mm", width=84, height=84, res=200)
par(mar=c(2, 3.5, 1, 3.5), font=2, cex=1, xpd=TRUE, bg="white", lwd=1, col.axis=1, col.lab=1)
ppp <- mcoin$mcoa$pseudoeig[1:10]
pct <- (mcoin$mcoa$pseudoeig/sum(mcoin$mcoa$pseudoeig))[1:10]
bp <- barplot(ppp, plot=F)
barplot(ppp, axes=F, xlab=NA, ylab=NA)
axis(side=1, at=bp, label=1:10, line=-1, col.ticks=NA, lwd=0)
axis(side=2, lwd=2)
par(new = T)
plot(bp, pct, ylim=c(0, 0.2), axes=F, frame.plot=F, col=4, pch=20, xlab=NA, ylab=NA, xlim=c(0.2, 12))
lines(bp, pct, col=4)
par(col.axis=4, col.lab=4, font=2)
axis(side=4, col=4, lwd=2, at=c(0, 0.05, 0.1, 0.15, 0.2), labels=c(0, 5, 10, 15, 20))
mtext(side = 1, "PCs", line = 1)
mtext(side = 2, "Eigenvalue", line = 2)
mtext(side = 4, "% of eigenvalue", line = 2, col=4)
# dev.off()