@ARTICLE{Choi2007, author = {Hyungwon Choi and Ronglai Shen and Arul Chinnaiyan and Debashis Ghosh}, title = {A Latent Variable Approach for Meta-Analysis of Gene Expression Data from Multiple Microarray Experiments.}, journal = {BMC Bioinformatics}, year = {2007}, volume = {8}, pages = {364}, number = {1}, month = {Sep}, abstract = {ABSTRACT: BACKGROUND: With the explosion in data generated using microarray technology by different investigators working on similar experiments, it is of interest to combine results across multiple studies. RESULTS: In this article, we describe a general probabilistic framework for combining high-throughput genomic data from several related microarray experiments using mixture models. A key feature of the model is the use of latent variables that represent quantities that can be combined across diverse platforms. We consider two methods for estimation of an index termed the probability of expression (POE). The first, reported in previous work by the authors, involves Markov Chain Monte Carlo (MCMC) techniques. The second method is a faster algorithm based on the expectation-maximization (EM) algorithm. The methods are illustrated with application to a meta-analysis of datasets for metastatic cancer. CONCLUSIONS: The statistical methods described in the paper are available as an R package, metaArray 1.7.1, which is at Bioconductor, whose URL is http://www.bioconductor.org/.}, doi = {10.1186/1471-2105-8-364}, owner = {rscharpf}, pii = {1471-2105-8-364}, pmid = {17900369}, timestamp = {2007.12.05}, url = {http://dx.doi.org/10.1186/1471-2105-8-364} } @ARTICLE{Cope2004, author = {Leslie Cope and Xiaogang Zhong and Elizabeth Garrett and Giovanni Parmigiani}, title = {{M}erge{M}aid: {R} tools for merging and cross-study validation of gene expression data.}, journal = {Stat Appl Genet Mol Biol}, year = {2004}, volume = {3}, pages = {Article29}, abstract = {Cross-study validation of gene expression investigations is critical in genomic analysis. We developed an R package and associated object definitions to merge and visualize multiple gene expression datasets. Our merging functions use arbitrary character IDs and generate objects that can efficiently support a variety of joint analyses. Visualization tools support exploration and cross-study validation of the data, without requiring normalization across platforms. Tools include "integrative correlation'' plots that is, scatterplots of all pairwise correlations in one study against the corresponding pairwise correlations of another, both for individual genes and all genes combined. Gene-specific plots can be used to identify genes whose changes are reliably measured across studies. Visualizations also include scatterplots of gene-specific statistics quantifying relationships between expression and phenotypes of interest, using linear, logistic and Cox regression.}, doi = {10.2202/1544-6115.1046}, keywords = {16646808}, owner = {rscharpf}, pmid = {16646808}, timestamp = {2007.01.05}, url = {http://dx.doi.org/10.2202/1544-6115.1046} } @ARTICLE{garb:etal:2001, author = {Mitchell E. Garber and Olga G. Troyanskaya and Karsten Schluens and Simone Petersen and Zsuzsanna Thaesler and Manuela Pacyna-Gengelbach and Matt van de Rijn and Glenn D. Rosen and Charles M. Perou and Richard I. Whyte and Russ B. Altman and Patrick O. Brown and David Botstein and Iver Petersen}, title = {Diversity of gene expression in adenocarcinoma of the lung}, journal = {Proceedings of the National Academy of Sciences USA}, year = {2001}, volume = {98}, pages = {13784--13789} } @ARTICLE{Garrett-Mayer2007, author = {Elizabeth Garrett-Mayer and Giovanni Parmigiani and Xiaogang Zhong and Leslie Cope and Edward Gabrielson}, title = {Cross-study validation and combined analysis of gene expression microarray data.}, journal = {Biostatistics}, year = {2007}, month = {Sep}, abstract = {Investigations of transcript levels on a genomic scale using hybridization-based arrays have led to formidable advances in our understanding of the biology of many human illnesses. At the same time, these investigations have generated controversy because of the probabilistic nature of the conclusions and the surfacing of noticeable discrepancies between the results of studies addressing the same biological question. In this article, we present simple and effective data analysis and visualization tools for gauging the degree to which the findings of one study are reproduced by others and for integrating multiple studies in a single analysis. We describe these approaches in the context of studies of breast cancer and illustrate that it is possible to identify a substantial biologically relevant subset of the human genome within which hybridization results are reliable. The subset generally varies with the platforms used, the tissues studied, and the populations being sampled. Despite important differences, it is also possible to develop simple expression measures that allow comparison across platforms, studies, laboratories and populations. Important biological signals are often preserved or enhanced. Cross-study validation and combination of microarray results requires careful, but not overly complex, statistical thinking and can become a routine component of genomic analysis.}, doi = {10.1093/biostatistics/kxm033}, keywords = {17873151}, owner = {gp}, pii = {kxm033}, pmid = {17873151}, timestamp = {2007.09.29}, url = {http://dx.doi.org/10.1093/biostatistics/kxm033} } @ARTICLE{Gentleman2004, author = {Robert C Gentleman and Vincent J Carey and Douglas M Bates and Ben Bolstad and Marcel Dettling and Sandrine Dudoit and Byron Ellis and Laurent Gautier and Yongchao Ge and Jeff Gentry and Kurt Hornik and Torsten Hothorn and Wolfgang Huber and Stefano Iacus and Rafael Irizarry and Friedrich Leisch and Cheng Li and Martin Maechler and Anthony J Rossini and Gunther Sawitzki and Colin Smith and Gordon Smyth and Luke Tierney and Jean Y H Yang and Jianhua Zhang}, title = {{B}ioconductor: open software development for computational biology and bioinformatics.}, journal = {Genome Biol}, year = {2004}, volume = {5}, pages = {R80}, number = {10}, abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.}, doi = {10.1186/gb-2004-5-10-r80}, keywords = {Computational Biology, Internet, Reproducibility of Results, Software, 15461798}, owner = {rscharpf}, pii = {gb-2004-5-10-r80}, pmid = {15461798}, timestamp = {2007.01.09}, url = {http://dx.doi.org/10.1186/gb-2004-5-10-r80} } @ARTICLE{Hong2006, author = {Fangxin Hong and Rainer Breitling and Connor W McEntee and Ben S Wittner and Jennifer L Nemhauser and Joanne Chory}, title = {RankProd: a bioconductor package for detecting differentially expressed genes in meta-analysis.}, journal = {Bioinformatics}, year = {2006}, volume = {22}, pages = {2825--2827}, number = {22}, month = {Nov}, abstract = {While meta-analysis provides a powerful tool for analyzing microarray experiments by combining data from multiple studies, it presents unique computational challenges. The Bioconductor package RankProd provides a new and intuitive tool for this purpose in detecting differentially expressed genes under two experimental conditions. The package modifies and extends the rank product method proposed by Breitling et al., [(2004) FEBS Lett., 573, 83-92] to integrate multiple microarray studies from different laboratories and/or platforms. It offers several advantages over t-test based methods and accepts pre-processed expression datasets produced from a wide variety of platforms. The significance of the detection is assessed by a non-parametric permutation test, and the associated P-value and false discovery rate (FDR) are included in the output alongside the genes that are detected by user-defined criteria. A visualization plot is provided to view actual expression levels for each gene with estimated significance measurements. AVAILABILITY: RankProd is available at Bioconductor http://www.bioconductor.org. A web-based interface will soon be available at http://cactus.salk.edu/RankProd}, doi = {10.1093/bioinformatics/btl476}, keywords = {Computational Bio; DNA, Complementary; Data Interpretation, Statistical; False Positive Reactions; Gene Expression Profiling; Gene Expression Regulation; Internet; Meta-Analysis as Topic; Metabolism; Models, Statistical; Oligonucleotide Array Sequence Analysis; Plant Proteins; Proteomics; Reproducibility of Results; Software; logy}, owner = {rscharpf}, pii = {btl476}, pmid = {16982708}, timestamp = {2007.12.05}, url = {http://dx.doi.org/10.1093/bioinformatics/btl476} } @Manual{Lusa2007, title = {GeneMeta: MetaAnalysis for High Throughput Experiments}, author = {Lara Lusa and R. Gentleman and M. Ruschhaupt}, year = {2007}, note = {R package version 1.11.0}, } @ARTICLE{Parmigiani2004, author = {Giovanni Parmigiani and Elizabeth Garrett and Ramaswamy Anbazhagan and Edward Gabrielson}, title = {{M}olecular classification of lung cancer: a cross-platform comparison of gene expression data sets.}, journal = {Chest}, year = {2004}, volume = {125}, pages = {103S}, number = {5 Suppl}, month = {May}, keywords = {Adenocarcinoma, Carcinoma, Gene Expression, Gene Expression Profiling, Humans, Lung Neoplasms, Proportional Hazards Models, Squamous Cell, 15136439}, owner = {rscharpf}, pmid = {15136439}, timestamp = {2007.03.07} } @MANUAL{Plummer2007, title = {coda: Output analysis and diagnostics for MCMC}, author = {Martyn Plummer and Nicky Best and Kate Cowles and Karen Vines}, year = {2007}, note = {R package version 0.12-1} } @ARTICLE{Scharpf2009, author = {Scharpf, Robert B. and Tjelmeland, H{\aa}kon and Parmigiani, Giovanni and Nobel, Andrew}, title = {A {B}ayesian model for cross-study differential gene expression}, journal = {JASA}, year = {2009}, note = {To appear} } @ARTICLE{Parmigiani2004a, author = {Giovanni Parmigiani and Elizabeth S Garrett-Mayer and Ramaswamy Anbazhagan and Edward Gabrielson}, title = {{A} cross-study comparison of gene expression studies for the molecular classification of lung cancer.}, journal = {Clin Cancer Res}, year = {2004}, volume = {10}, pages = {2922--2927}, number = {9}, month = {May}, abstract = {PURPOSE: Recent studies sought to refine lung cancer classification using gene expression microarrays. We evaluate the extent to which these studies agree and whether results can be integrated. EXPERIMENTAL DESIGN: We developed a practical analysis plan for cross-study comparison, validation, and integration of cancer molecular classification studies using public data. We evaluated genes for cross-platform consistency of expression patterns, using integrative correlations, which quantify cross-study reproducibility without relying on direct assimilation of expression measurements across platforms. We then compared associations of gene expression levels to differential diagnosis of squamous cell carcinoma versus adenocarcinoma via reproducibility of the gene-specific t statistics and to survival via reproducibility of Cox coefficients. RESULTS: Integrative correlation analysis revealed a large proportion of genes in which the patterns agreed across studies more than would be expected by chance. Correlation of t statistics for diagnosis of squamous cell carcinoma versus adenocarcinoma is high (0.85) and increases (0.925) when using only the most consistent genes identified by integrative correlation. Correlations of Cox coefficients ranged from 0.13 to 0.31 (0.33-0.49 with genes selected for consistency). Although we find genes that are significant in multiple studies but show discordant effects, their number is approximately that expected by chance. We report genes that are reproducible by integrative analysis, significant in all studies, and concordant in effect. CONCLUSIONS: Cross-study comparison revealed significant, albeit incomplete, agreement of gene expression patterns related to lung cancer biology and identified genes that reproducibly predict outcomes. This analysis approach is broadly applicable to cross-study comparisons of gene expression profiling projects.}, keywords = {Adenocarcinoma, Carcinoma, Diagnosis, Differential, Gene Expression Profiling, Gene Expression Regulation, Humans, Lung Neoplasms, Neoplastic, Reproducibility of Results, Squamous Cell, 15131026}, owner = {rscharpf}, pmid = {15131026}, timestamp = {2007.03.07} }