////////////////////////////////////////////////////////////////
//
// Copyright (C) 2005 Affymetrix, Inc.
//
// This program is free software; you can redistribute it and/or modify 
// it under the terms of the GNU General Public License (version 2) as 
// published by the Free Software Foundation.
// 
// This program is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// General Public License for more details.
// 
// You should have received a copy of the GNU General Public License 
// along with this program; if not, write to the 
// 
// Free Software Foundation, Inc., 
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
////////////////////////////////////////////////////////////////

#include <assert.h>
#include <fstream>
#include <iostream>
#include <iomanip>
#include <istream>
#include <math.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#include "SnpData.h"

//////////////////////////////////////////////////////////////////////

using namespace affxsnp;
using namespace std;

//////////////////////////////////////////////////////////////////////
double CSnpWithin::width = 100000;

//////////////////////////////////////////////////////////////////////
CGenotype::CGenotype() {
	hap1 = NUCLEOTIDE_N;
	hap2 = NUCLEOTIDE_N;
	phased = false;
}

CGenotype::CGenotype(char h1, char h2) {
	hap1 = h1;
	hap2 = h2;
	phased = false;
}

CGenotype::CGenotype(char h1, char h2, bool p) {
	hap1 = h1;
	hap2 = h2;
	phased = p;
}

void CGenotype::SetHap1(char h) {
	switch(h) {
		case NUCLEOTIDE_A:
		case NUCLEOTIDE_C:
		case NUCLEOTIDE_G:
		case NUCLEOTIDE_T:
		case NUCLEOTIDE_N:
			hap1 = h;
			break;
		default:
			throw "Invalid haplotype specified for hap1";
			hap1 = NUCLEOTIDE_N;
	}
}

void CGenotype::SetHap2(char h) {
	switch(h) {
		case NUCLEOTIDE_A:
		case NUCLEOTIDE_C:
		case NUCLEOTIDE_G:
		case NUCLEOTIDE_T:
		case NUCLEOTIDE_N:
			hap2 = h;
			break;
		default:
			throw "Invalid haplotype specified for hap2";
			hap2 = NUCLEOTIDE_N;
	}
}

//////////////////////////////////////////////////////////////////////

double CSnpData::GetCallRate(void) const {
	std::vector<CGenotype>::const_iterator g;
	unsigned int nCall=0;
	for(g=geno.begin(); g != geno.end(); g++) {
		if((g->GetHap1() != NUCLEOTIDE_N) && (g->GetHap1() != NUCLEOTIDE_N))
			nCall++;
	}
	return((double)nCall/(double)geno.size());
}

void CSnpData::print(void) const {
	cout << setiosflags(ios::fixed);
	cout << std::setprecision(0);
	cout << "snpID\t" << snpID << "\n";
	cout << "seq\t" << seq << "\n";
	cout << "pos\t" << pos << "\n";
	cout << "allele1\t" << ((int)allele1) << "\n";
	cout << "allele2\t" << ((int)allele2) << "\n";
	cout << std::setprecision(4);
	cout << "maf\t" << maf << "\n";
	cout << "geno\t";
	cout << std::setprecision(0);
	cout << geno.size() << "\t";
	std::vector<CGenotype>::const_iterator g;
	for(g=geno.begin(); g != geno.end(); g++) {
		cout << " " << ((int)g->GetHap1()) << ((int)g->GetHap2());
	}
	cout << "\n";
}

CSnpData::CSnpData() {
	seq = "Unknown";
	pos = 0;
	allele1 = NUCLEOTIDE_N;
	allele2 = NUCLEOTIDE_N;
	geno.resize(0);
}

CSnpData::CSnpData(std::string i, std::string s, double p) {
	snpID = i;
	seq = s;
	pos = p;
	allele1 = NUCLEOTIDE_N;
	allele2 = NUCLEOTIDE_N;
	geno.resize(0);
}

CSnpData::CSnpData(std::string i, std::string s, double p, int n) {
	snpID = i;
	seq = s;
	pos = p;
	allele1 = NUCLEOTIDE_N;
	allele2 = NUCLEOTIDE_N;
	geno.resize(n);
}

void CSnpData::SetGeno(unsigned int i, char h1, char h2, bool p) {
	if((i<0) || (i>=geno.size())) {
		throw "Bad sample index for which to set genotype\n";
		return;
	}
	geno[i].SetHap1(h1);
	geno[i].SetHap2(h2);
	geno[i].SetPhased(p);

	return;
}

void CSnpData::SetAlleles() {
	std::vector<CGenotype>::const_iterator g;
	int n_alleles_found = 0;
	int n_allele1=0;
	int n_allele2=0;
	
	for(g = geno.begin(); g != geno.end(); g++) 
	{
		if(n_alleles_found == 0) 
		{
			// process the 1st allele
			if(g->GetHap1() != NUCLEOTIDE_N) 
			{
				allele1 = g->GetHap1();
				n_alleles_found=1;
				n_allele1 = 1;
			}

			// process the 2nd allele
			if (g->GetHap2() != NUCLEOTIDE_N) 
			{
				if(n_alleles_found == 0) 
				{
					allele1 = g->GetHap2();
					n_alleles_found=1;
					n_allele1 = 1;
				} 
				else if(g->GetHap2() != allele1) 
				{
					allele2 = g->GetHap2();
					n_alleles_found=2;
					n_allele2 = 1;
				}
				else if(g->GetHap2() == allele1) 
				{
					n_allele1++;
				}
			}
		} 
		else if(n_alleles_found == 1) 
		{
			// process the 1st allele
			if((g->GetHap1() != NUCLEOTIDE_N) && (g->GetHap1() != allele1)) 
			{
				allele2 = g->GetHap1();
				n_alleles_found=2;
				n_allele2 = 1;
				/*if(g->GetHap2() == allele1) 
				{
					n_allele1++;
				} 
				else if(g->GetHap2() == allele2) 
				{
					n_allele2++;
				} 
				else if (g->GetHap2() != NUCLEOTIDE_N) 
				{
					throw "inconsistent alleles found\n";
				}*/
			} 
			else if((g->GetHap1() != NUCLEOTIDE_N) && (g->GetHap1() == allele1)) 
			{
				n_allele1++;
			} 

			// process the 2nd allele
			if(n_alleles_found == 1) 
			{
				if((g->GetHap2() != NUCLEOTIDE_N) && (g->GetHap2() != allele1)) 
				{
					allele2 = g->GetHap2();
					n_alleles_found=2;
					n_allele2 = 1;
				}
				else if((g->GetHap2() != NUCLEOTIDE_N) && (g->GetHap2() == allele1))
				{
					n_allele1++;
				}
			}
			else if(n_alleles_found == 2) 
			{
				if(g->GetHap2() == allele1) 
				{
					n_allele1++;
				} 
				else if(g->GetHap2() == allele2) 
				{
					n_allele2++;
				} 
				else if (g->GetHap2() != NUCLEOTIDE_N) 
				{
					throw "inconsistent alleles found\n";
				}
			}

		} 
		else //in case (n_alleles_found == 2)
		{
			if(g->GetHap1() == allele1) 
			{
				n_allele1++;
			} 
			else if(g->GetHap1() == allele2) 
			{
				n_allele2++;
			} 
			else if(g->GetHap1() != NUCLEOTIDE_N) 
			{
				throw "Unexpected value for first allele\n";
			}
			
			if(g->GetHap2() == allele1) 
			{
				n_allele1++;
			} 
			else if(g->GetHap2() == allele2) 
			{
				n_allele2++;
			} 
			else if(g->GetHap2() != NUCLEOTIDE_N) 
			{
				throw "Unexpected value for first allele\n";
			}
		}
	}
	if(n_allele1 < n_allele2) {
		char temp_char = allele1;
		allele1 = allele2;
		allele2 = temp_char;
		int temp_int = n_allele1;
		n_allele1 = n_allele2;
		n_allele2 = temp_int;
	}
	maf = (n_allele1 > 0) ? ( ((double) n_allele2) / ((double) n_allele1 + n_allele2) ) : 0;
}


int CSnpPanel::dropSNPs(std::vector<int> &dropIndex) {
	std::vector<int>::reverse_iterator drop_it;
	vSnpItNC v = vsnp.begin();
	for(drop_it = dropIndex.rbegin(); drop_it != dropIndex.rend(); drop_it++) {
		vsnp.erase(v + (*drop_it));
	}
	return(0);
}

int CSnpPanel::highMAF(double mafThreshold, std::vector<int> &highMAF, std::vector<int> &lowMAF) {
	
	if (vsnp.size()>0){	// single marker mode
	std::vector<CSnpData>::const_iterator s;
	
	int i=0;
	int nLow=0;
	highMAF.clear();
	lowMAF.clear();
	for(s = vsnp.begin(); s != vsnp.end(); s++,i++) {
		if(s->GetMAF() < mafThreshold) {
			lowMAF.push_back(i);
			nLow++;
		} else {
			highMAF.push_back(i);
		}
	}

	return(nLow);
	}	
	else{ //multiple marker mode
	std::vector<HaplotypeData>::const_iterator s;
	
	int i=0;
	int nLow=0;
	highMAF.clear();
	lowMAF.clear();
	for(s = vhaplo.begin(); s != vhaplo.end(); s++,i++) {
		if(s->GetMAF() < mafThreshold) {
			lowMAF.push_back(i);
			nLow++;
		} else {
			highMAF.push_back(i);
		}
	}
	return(nLow);

	}
}

void CSnpData::ApplyCallRate(const double& p) 
{
	assert(p<=1.0 && p>=0.0);

	if(p >= 1-EPSILON)
		return;

	double p1 = 0;
	std::vector <CGenotype>::iterator it = geno.begin();
	for(; it != geno.end(); it ++)
	{
		// hopefully this is uniformly distributed
		p1 = double(rand())/double(RAND_MAX);
		if(p1 > p)
		{
			it->SetHap1(NUCLEOTIDE_N);
			it->SetHap2(NUCLEOTIDE_N);
		}
	}
}

void CSnpData::ApplyCallRateHet(const double& p) 
{
	assert(p<=1.0 && p>=0.0);

	if(p >= 1-EPSILON)
		return;

	double p1 = 0;
	std::vector <CGenotype>::iterator it = geno.begin();
	for(; it != geno.end(); it ++)
	{
		// hopefully this is uniformly distributed
		p1 = double(rand())/double(RAND_MAX);
		if(p1 > p && (it->GetHap1() != it->GetHap2()))
		{
			it->SetHap1(NUCLEOTIDE_N);
			it->SetHap2(NUCLEOTIDE_N);
		}
	}
}

void CSnpData::ApplyCallRateHom(const double& p) 
{
	assert(p<=1.0 && p>=0.0);

	if(p >= 1-EPSILON)
		return;

	double p1 = 0;
	std::vector <CGenotype>::iterator it = geno.begin();
	for(; it != geno.end(); it ++)
	{
		// hopefully this is uniformly distributed
		p1 = double(rand())/double(RAND_MAX);
		if(p1 > p && (it->GetHap1() == it->GetHap2()))
		{
			it->SetHap1(NUCLEOTIDE_N);
			it->SetHap2(NUCLEOTIDE_N);
		}
	}
}

unsigned short RollDie()
{
	return (rand()%4 + 1);
}

unsigned short FlipCoin()
{
	return (rand()%2);
}

void CSnpData::ApplyErrorRate(double p_hom_err, double p_het_err) {
	double allele_bias = 0.5;
	ApplyErrorRate(p_hom_err,p_het_err,allele_bias);
}

void CSnpData::ApplyErrorRate(double p) {
	double allele_bias = 0.5;
	double p_hom_err = p;
	double p_het_err = p;
	ApplyErrorRate(p_hom_err,p_het_err,allele_bias);
}

void CSnpData::ApplyErrorRate(double p_hom_err, double p_het_err, double allele_bias) {

	assert(p_hom_err<=1 && p_hom_err>=0);
	assert(p_het_err<=1 && p_het_err>=0);
	assert(allele_bias<=1 && allele_bias>=0);
	if((p_hom_err > 1-EPSILON) && (p_het_err > 1-EPSILON))
		return;

	unsigned short ghap1 = allele1, ghap2 = allele2;

	// did not find ghap1, create randomly
	if(ghap1 == NUCLEOTIDE_N) 
	{
		ghap1 = RollDie();
		while(ghap1 == ghap2)
			ghap1 = RollDie();
	}

	// did not find ghap2 or the two are equal, create randomly
	if(ghap2 == NUCLEOTIDE_N || ghap1 == ghap2) 
	{
		ghap2 = RollDie();
		while(ghap1 == ghap2)
			ghap2 = RollDie();
	}

	// For het errors, one allele will randomly (with probability q) be 'preferred'.
	unsigned short preferred_allele,other_allele;
	if(FlipCoin()) {
		preferred_allele = ghap1;
		other_allele = ghap2;
	} else {
		preferred_allele = ghap2;
		other_allele = ghap1;
	}

	std::vector <CGenotype>::iterator it = geno.begin();
	for(it = geno.begin(); it != geno.end(); it ++)
	{
		double p1;
		if(it->GetHap1() != it->GetHap2()) {
			p1 = double(rand())/double(RAND_MAX);
			if(p1 < p_het_err) {
				// het calls get converted to homo, in a possibly biased fashion
				double q1 = double(rand())/double(RAND_MAX);
				unsigned short hom_type = (1 - (q1 > allele_bias))*preferred_allele + (q1 > allele_bias)*other_allele;
				it->SetHap1(hom_type);
				it->SetHap2(hom_type);
			}
		} else if((it->GetHap1() != NUCLEOTIDE_N) && (it->GetHap2() != NUCLEOTIDE_N)) {
			p1 = double(rand())/double(RAND_MAX);
			if(p1 < p_hom_err) {
				// homo calls get converted to het
				it->SetHap1(ghap1);
				it->SetHap2(ghap2);
			}
		}
	}
}

//////////////////////////////////////////////////////////////////////

CSampleData::CSampleData(std::string p, std::string s, std::string f, std::string m, char g, char a, int i) {
	sampleID = s;
	familyID = p;
	fatherID = f;
	motherID = m;
	gender = g;
	affected = a;
	storage_index = i;
}

//////////////////////////////////////////////////////////////////////
CSnpPanel::CSnpPanel()
{
}

void CSnpPanel::ReadPedFile(std::string pedFile)
{
	// clear the contents if there is any
	sample.clear();

	// assuming no column headers
	// columns were speicified at <http://www.broad.mit.edu/mpg/haploview/files.php#formats>.
	std::ifstream fin(pedFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		std::string errmsg = "Can not open ped file :"+ pedFile + ", make sure the file exist.";
		throw errmsg;
	}

	int nNumSnps = vsnp.size();

	// the estimated length of line
	int nSamples = 0;
	std::string stream;
	while(fin.good())
	{
		if(!std::getline(fin, stream,'\n')) 
			continue; // skip the blank lines
		if(fin.fail()) throw "Error in reading ped file";
		nSamples ++;
	}
	fin.close();

	std::string strPedId,strSampleId,strFatherId,strMotherId;
	char gender,status;
	unsigned short hap1, hap2;
	// will come back to this later
//	fin.seekg(0,std::ios_base::beg);	
//	long pos = fin.tellg();
	std::ifstream fin2(pedFile.c_str(), std::ios_base::in);
	for(int s = 0; s < nSamples; s++)
	{
		if(!(fin2 >> strPedId >> strSampleId >> strFatherId >> strMotherId >> gender >> status)) 
			continue; // skip blank lines

		if(fin2.fail()) throw "Error in reading ped file";

		// store the sample info
		CSampleData sam(strPedId,strSampleId,strFatherId,strMotherId,gender,status,s);
		std::pair<std::string,std::string> pstr;
		pstr.first = strPedId;
		pstr.second = strSampleId;
		sample.insert(std::map <std::pair<std::string,std::string>, CSampleData>::value_type(pstr,sam));

		// scan genotype info for all snps and store 
		for(int i = 0; i < nNumSnps; i ++)
		{
			// set the buffer size for each snp once
			if(s == 0) vsnp[i].geno.resize(nSamples);

			// scan the genotypes in -- always paired for the two alleles
			if(!(fin2 >> hap1 >> hap2))
				throw "Ped file and info file do not match";

			vsnp[i].geno[s].hap1 = hap1;
			vsnp[i].geno[s].hap2 = hap2;

			//if(i==0) cout << "SampleID is " << strSampleId.c_str() << " and first genotype is [" << szNUCLEOTIDE[int(vsnp[i].geno[s].hap1)] << ", " << szNUCLEOTIDE[int(vsnp[i].geno[s].hap2)] << "]\n";
		}
	}

	fin2.close();
	if(sample.size() == 0) throw "Invalid ped file format!";
}

void CSnpPanel::ReadInfoFile(std::string infoFile, std::string seqName)
{
	// clear the contents if there is any
	vsnp.clear();

	// assuming no column headers
	// columns were speicified at <http://www.broad.mit.edu/mpg/haploview/files.php#formats>.
	std::ifstream fin(infoFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		std::string errmsg = "Can not open info file :"+ infoFile + ", make sure the file exist.";
		throw errmsg;
	}

	while(fin.good())
	{
		CSnpData snp;
		if(!(fin >> snp.snpID >> snp.pos)) 
			continue; // skip blank lines
		snp.seq = seqName;
		vsnp.push_back(snp);
	}

	fin.close();
	if(vsnp.size() == 0)  throw "Invalid info file format!";
}

void CSnpPanel::Read(std::string pedFile, std::string infoFile) {
  Read(pedFile,infoFile,DEFAULT_SEQ_NAME);
}

void CSnpPanel::Read(std::string pedFile, std::string infoFile, std::string seqName) {
	// See <http://www.broad.mit.edu/mpg/haploview/files.php#formats> for details of the .ped and .info files to be parsed.

	// read info file first so that we know how many snps we are handling
	ReadInfoFile(infoFile,seqName);

	// read the ped file second
	ReadPedFile(pedFile);

	// determine the alleles for each SNP
	std::vector<CSnpData>::iterator s;
	for(s = vsnp.begin(); s != vsnp.end(); s++)
		s->SetAlleles();

	// sort the info: seq then pos then snpid
	std::sort(vsnp.begin(), vsnp.end(), CSnpLess());

	return;
}

long CSnpPanel::Add(std::string pedFile, std::string infoFile, std::string seqName)
{
	long lRet = 0;

	// Number of SNPs existing
	int nNumberSNPs = GetNumberSnps();

	// read info file first so that we know how many snps we are handling
	int nMoreSnps = 0;
	lRet = AddSNPs(pedFile, infoFile, nMoreSnps, seqName);
	if(nMoreSnps == 0) return lRet;

	// read the ped file second
	if(!lRet)
		lRet = AddPedInfo(pedFile, nMoreSnps);

	if(!lRet)
	{
		// determine the alleles for each added SNP
		for(unsigned int s = nNumberSNPs; s < vsnp.size(); s++)
			vsnp[s].SetAlleles();

		// sort the info: seq then pos then snpid
		// this may not be efficient, but stay with it for now
		std::sort(vsnp.begin(), vsnp.end(), CSnpLess());
	}

	return lRet;
}

void CSnpPanel::SortTest() {
	std::vector <CSnpData> tempSnp(2);
	for(unsigned int s = 1; s < vsnp.size(); s++) {
		tempSnp[0] = vsnp[s-1];
		tempSnp[1] = vsnp[s];
		std::sort(tempSnp.begin(),tempSnp.end(),CSnpLess());
		if(!(tempSnp[0] == vsnp[s-1])) {
			cout << "disorder found between snps " << s-1 << " and " << s << ":\n";
			vsnp[s-1].print();
			vsnp[s].print();
			exit(EXIT_FAILURE);
		}
	}
}

long CSnpPanel::AddSNPs(std::string pedFile, std::string infoFile, int& nMoreSnps, std::string seqName)
{
	long lRet = 0;

// TODO:
// The sample consistency check only checks that the expected samples are found in the ped file, but
// it doesn't check that they occur in the same order.  Hopefully a sane user will ensure that samples
// are specified in the same order in the ped files, but really we should check it and throw an exception
// if it isn't the case.

	// check sample consistency before add any snps
	// assuming no column headers
	// columns were speicified at <http://www.broad.mit.edu/mpg/haploview/files.php#formats>.
	std::ifstream fin(pedFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		lRet = 1;
		return lRet;
	}

	// the estimated length of line
	int nSamples = 0;
	std::string stream;
	std::string strPedId,strSampleId,strFatherId,strMotherId;
	char gender,status;
	while(fin.good())
	{
		if(!(fin >> strPedId >> strSampleId >> strFatherId >> strMotherId >> gender >> status)) 
			continue;
		if(!std::getline(fin, stream,'\n')) 
			continue; // skip the blank lines
		if(fin.fail()) throw "Error in reading ped file";

		PairedString key(strPedId,strSampleId);
		if(sample.find(key) == sample.end()) // sample not availale in existing sample list
		{
			fin.close();
			return 2;
		}
		nSamples ++;
	}
	fin.close();

	assert((unsigned int)nSamples == sample.size());

	// assuming no column headers
	// columns were speicified at <http://www.broad.mit.edu/mpg/haploview/files.php#formats>.
	std::ifstream fin2(infoFile.c_str(), std::ios_base::in);
	if(!fin2)
	{
		std::string errmsg = "Can not open info file :"+ infoFile + ", make sure the file exist.";
		throw errmsg;
	}

	nMoreSnps = 0;
	while(fin2.good())
	{
		CSnpData snp;
		if(!(fin2 >> snp.snpID >> snp.pos)) 
			continue; // skip blank lines
		snp.seq = seqName;
// TODO:
// The check below is a good thing to do, but as implemented (using std::find) it was
// killing performance - linear scan for every added SNP.  We should replace this by
// something more effieicent - for example, add to CSnpPanel a map keyed by (seq,pos,snpID)
// and with boolean values indicating if a SNP is already in the panel or not.  For now
// I'm just commenting out the check as the files we are using should not contain duplicate
// SNP entries.
		//if(std::find(vsnp.begin(),vsnp.end(),snp) != vsnp.end())
		//	continue;
		vsnp.push_back(snp);
		nMoreSnps ++;
	}

	fin2.close();
	if(vsnp.size() == 0)  throw "Invalid info file format!";

	return lRet;
}

long CSnpPanel::AddPedInfo(std::string pedFile, const int& nMoreSnps)
{
	long lRet = 0;

	std::string strPedId,strSampleId,strFatherId,strMotherId;
	char gender,status;
	unsigned short hap1, hap2;
	int nSamples = sample.size();
	int nNumSnps = vsnp.size();

	std::ifstream fin(pedFile.c_str(), std::ios_base::in);
	int start = nNumSnps - nMoreSnps;
	for(int s = 0; s < nSamples; s++)
	{
		if(!(fin >> strPedId >> strSampleId >> strFatherId >> strMotherId >> gender >> status)) 
			continue; // skip blank lines

		if(fin.fail()) throw "Error in reading ped file";

		// scan genotype info for all snps and store 
		for(int i = start; i < nNumSnps; i ++)
		{
			// set the buffer size for each snp once
			if(s == 0) vsnp[i].geno.resize(nSamples);

			// scan the genotypes in -- always paired for the two alleles
			if(!(fin >> hap1 >> hap2))
				throw "Ped file and info file do not match";

			vsnp[i].geno[s].hap1 = hap1;
			vsnp[i].geno[s].hap2 = hap2;
		}
	}

	fin.close();

	return lRet;
}

void CSnpPanel::PhasingFamily(const char child1,const char child2,const char father1,const char father2,const char mother1,const char mother2,TRANSITION& trans)
{
	// In what follows there is an underlying assumption that if one haplotype is N the
	// other is too, so to be safe we assert it up-front:
	if (child1 == NUCLEOTIDE_N || child2 == NUCLEOTIDE_N)
		assert(child1 == NUCLEOTIDE_N && child2 == NUCLEOTIDE_N);
	if (father1 == NUCLEOTIDE_N || father2 == NUCLEOTIDE_N)
		assert(father1 == NUCLEOTIDE_N && father2 == NUCLEOTIDE_N);
	if (mother1 == NUCLEOTIDE_N || mother2 == NUCLEOTIDE_N)
		assert(mother1 == NUCLEOTIDE_N && mother2 == NUCLEOTIDE_N);

	trans.fatherPhased = false;
	trans.motherPhased = false;
	if (child1 == NUCLEOTIDE_N || child2 == NUCLEOTIDE_N) {
		trans.fatherTb = father1;
		trans.fatherUb = father2;
		if(father1 == father2)
			trans.fatherPhased = true;
		trans.motherTb = mother1;
		trans.motherUb = mother2;
		if(mother1 == mother2)
			trans.motherPhased = true;
	} else if (child1 == child2) {
		// child homozygous
		if (father1 == NUCLEOTIDE_N) {
			trans.fatherTb = child1;
			trans.fatherUb = NUCLEOTIDE_N;
		} else if (father1 == child1) {
			trans.fatherTb = father1;
			trans.fatherUb = father2;
		} else {
			trans.fatherTb = father2;
			trans.fatherUb = father1;
		}
		trans.fatherPhased = true;
		
		if (mother1 == NUCLEOTIDE_N) {
			trans.motherTb = child1;
			trans.motherUb = NUCLEOTIDE_N;
		} else if (mother1 == child1) {
			trans.motherTb = mother1;
			trans.motherUb = mother2;
		} else {
			trans.motherTb = mother2;
			trans.motherUb = mother1;
		}
		trans.motherPhased = true;
	} else {
		// child heterozygous
		if (father1 == NUCLEOTIDE_N && mother1 == NUCLEOTIDE_N) {
			//both missing
			trans.fatherTb = NUCLEOTIDE_N;
			trans.fatherUb = NUCLEOTIDE_N;
			trans.motherTb = NUCLEOTIDE_N;
			trans.motherUb = NUCLEOTIDE_N;
		} else if (father1 == NUCLEOTIDE_N && mother1 != mother2) {
			//father missing mother het
			trans.fatherTb = NUCLEOTIDE_N;
			trans.fatherUb = NUCLEOTIDE_N;
			trans.motherTb = mother1;
			trans.motherUb = mother2;
		} else if (mother1 == NUCLEOTIDE_N && father1 != father2) {
			//father het mother missing
			trans.fatherTb = father1;
			trans.fatherUb = father2;
			trans.motherTb = NUCLEOTIDE_N;
			trans.motherUb = NUCLEOTIDE_N;
		} else if (father1 == NUCLEOTIDE_N && mother1 == mother2) {
			//father missing mother hom
			trans.motherTb = mother1;
			trans.motherUb = mother1;
			trans.motherPhased = true;
			trans.fatherTb = (child1 == mother1) ? child2 : child1;
			trans.fatherUb = NUCLEOTIDE_N;
			trans.fatherPhased = true;
		} else if (mother1 == NUCLEOTIDE_N && father1 == father2) {
			//mother missing father hom
			trans.fatherTb = father1;
			trans.fatherUb = father1;
			trans.fatherPhased = true;
			trans.motherTb = (child1 == father1) ? child2 : child1;
			trans.motherUb = NUCLEOTIDE_N;
			trans.motherPhased = true;
		} else if (father1 == father2 && mother1 != mother2) {
			//father hom mother het
			trans.fatherTb = father1;
			trans.fatherUb = father2;
			trans.fatherPhased = true;
			if (child1 == father1) {
				trans.motherTb = child2;
				trans.motherUb = child1;
			} else {
				trans.motherTb = child1;
				trans.motherUb = child2;
			}
			trans.motherPhased = true;
		} else if (mother1 == mother2 && father1 != father2) {
			//father het mother hom
			trans.motherTb = mother1;
			trans.motherUb = mother2;
			trans.motherPhased = true;
			if (child1 == mother1) {
				trans.fatherTb = child2;
				trans.fatherUb = child1;
			} else {
				trans.fatherTb = child1;
				trans.fatherUb = child2;
			}
			trans.fatherPhased = true;
		} else if (father1 == father2 && mother1 == mother2) {
			//mother & father hom
			trans.fatherTb = father1;
			trans.fatherUb = father1;
			trans.fatherPhased = true;
			trans.motherTb = mother1;
			trans.motherUb = mother1;
			trans.motherPhased = true;
		} else {
			//everybody het
			trans.fatherTb = father1;
			trans.fatherUb = father2;
			trans.motherTb = mother1;
			trans.motherUb = mother2;
		}
	}

	return;
}

CSampleData CSnpPanel::GetSampleByIndex(int i) {
	for(mapSampleIt sit = sample.begin(); sit != sample.end(); sit++) {
		if(i==sit->second.GetStorageIndex())
			return(sit->second);
	}
	throw "Unable to find sample of given index\n";
}

void CSnpPanel::PhaseTrios(CSnpData& snp)
{
	char father1,father2,mother1,mother2,child1,child2;

	std::map <PairedString, CSampleData>::iterator sit;
	for(sit = sample.begin(); sit != sample.end(); sit ++)
	{
		// Get parents for this sample if there are any
		ParentIndex pidx(-1,-1);
		Parent parent = GetParent(sit);
		pidx = GetParentIndex(parent);

		// let's assume either both parents exist or neighter for now
		// will change this later
		if(pidx.first == -1 || pidx.second == -1) // no parents
			continue;

		child1= snp.geno[sit->second.GetStorageIndex()].hap1;
		child2= snp.geno[sit->second.GetStorageIndex()].hap2;
		father1 = snp.geno[pidx.first].hap1;
		father2 = snp.geno[pidx.first].hap2;
		mother1 = snp.geno[pidx.second].hap1;
		mother2 = snp.geno[pidx.second].hap2;

		TRANSITION trans;
		PhasingFamily(child1,child2,father1,father2,mother1,mother2,trans);

		// Father
		snp.geno[pidx.first].SetHap1(trans.fatherTb);
		snp.geno[pidx.first].SetHap2(trans.fatherUb);
		snp.geno[pidx.first].SetPhased((trans.fatherPhased) != 0);

		// Mother
		snp.geno[pidx.second].SetHap1(trans.motherTb);
		snp.geno[pidx.second].SetHap2(trans.motherUb);
		snp.geno[pidx.second].SetPhased((trans.motherPhased) != 0);			
	}
}

void CSnpPanel::PhaseTrios(vSnpItNC& it)
{
	char father1,father2,mother1,mother2,child1,child2;

	std::map <PairedString, CSampleData>::iterator sit;
	for(sit = sample.begin(); sit != sample.end(); sit ++)
	{
		// Get parents for this sample if there are any
		ParentIndex pidx(-1,-1);
		Parent parent = GetParent(sit);
		pidx = GetParentIndex(parent);

		// let's assume either both parents exist or neighter for now
		// will change this later
		if(pidx.first == -1 || pidx.second == -1) // no parents
			continue;

		child1= it->geno[sit->second.GetStorageIndex()].hap1;
		child2= it->geno[sit->second.GetStorageIndex()].hap2;
		father1 = it->geno[pidx.first].hap1;
		father2 = it->geno[pidx.first].hap2;
		mother1 = it->geno[pidx.second].hap1;
		mother2 = it->geno[pidx.second].hap2;

		TRANSITION trans;
		PhasingFamily(child1,child2,father1,father2,mother1,mother2,trans);

		// Father
		it->geno[pidx.first].SetHap1(trans.fatherTb);
		it->geno[pidx.first].SetHap2(trans.fatherUb);
		it->geno[pidx.first].SetPhased((trans.fatherPhased) != 0);

		// Mother
		it->geno[pidx.second].SetHap1(trans.motherTb);
		it->geno[pidx.second].SetHap2(trans.motherUb);
		it->geno[pidx.second].SetPhased((trans.motherPhased) != 0);			
	}
}

void CSnpPanel::PhaseTrios()
{
	char father1,father2,mother1,mother2,child1,child2;

	for(mapSampleIt sit = sample.begin(); sit != sample.end(); sit ++)
	{
		// Get parents for this sample if there are any
		ParentIndex pidx(-1,-1);
		Parent parent = GetParent(sit);
		pidx = GetParentIndex(parent);

		// let's assume either both parents exist or neighter for now
		// will change this later
		if(pidx.first == -1 || pidx.second == -1) // no parents
			continue;

		std::vector<CSnpData>::iterator it = vsnp.begin();
		for(; it != vsnp.end(); it ++)
		{
			child1= it->geno[sit->second.GetStorageIndex()].hap1;
			child2= it->geno[sit->second.GetStorageIndex()].hap2;
			father1 = it->geno[pidx.first].hap1;
			father2 = it->geno[pidx.first].hap2;
			mother1 = it->geno[pidx.second].hap1;
			mother2 = it->geno[pidx.second].hap2;

			TRANSITION trans;
			PhasingFamily(child1,child2,father1,father2,mother1,mother2,trans);

			// Father
			it->geno[pidx.first].SetHap1(trans.fatherTb);
			it->geno[pidx.first].SetHap2(trans.fatherUb);
			it->geno[pidx.first].SetPhased(trans.fatherPhased);

			// Mother
			it->geno[pidx.second].SetHap1(trans.motherTb);
			it->geno[pidx.second].SetHap2(trans.motherUb);
			it->geno[pidx.second].SetPhased(trans.motherPhased);
		}
	}
}


void CSnpPanel::GetUnrelatedSampleIndex(std::vector< std::pair<unsigned int, std::string> >& u) {
	int i=0;
	for(mapSampleIt sit = sample.begin(); sit != sample.end(); sit++,i++) {
		// Get parents for this sample if there are any
		ParentIndex pidx(-1,-1);
		Parent parent = GetParent(sit);
		pidx = GetParentIndex(parent);

		// Add sample index to result if it has no parents in the panel
		if(pidx.first == -1 && pidx.second == -1) {
			std::pair <unsigned int, std::string> p(sit->second.GetStorageIndex(),sit->second.GetSampleID());
			u.push_back(p);
		}
	}

	return;
}


bool CSnpPanel::GetSNPsNearTo(const CSnpData& ref, const double& windowwidth, PairedSnpIt& pit)
{
	return(GetSNPsNearTo(ref,windowwidth,windowwidth,pit,false));
}

bool CSnpPanel::GetSNPsNearTo(const CSnpData& ref, const double& leftWindowSize, const double& rightWindowSize, PairedSnpIt& pit)
{
	return(GetSNPsNearTo(ref,leftWindowSize,rightWindowSize,pit,false));
}

bool CSnpPanel::GetSNPsNearTo(const CSnpData& ref, const double& leftWindowSize, const double& rightWindowSize, PairedSnpIt& pit, bool verbose)
{
	CSnpData lower = ref;
	lower.pos -= leftWindowSize;
	CSnpData upper = ref;
	upper.pos += rightWindowSize;

	if(verbose) {
		cout << "Searching based on " << ref.GetSnpID() << " (pos = " << ref.pos << "):\n";
		ref.print();
	}
	pit.first = std::lower_bound(vsnp.begin(),vsnp.end(),lower,CSnpLess());
	if(verbose) {
		cout << "Lower bound for " << ref.GetSnpID() << " (lower.pos = " << lower.pos << "):\n";
		pit.first->print();
		if(pit.first == vsnp.end())
			cout << "Have reached the end.\n";
	}
	pit.second = std::upper_bound(vsnp.begin(),vsnp.end(),upper,CSnpLess());
	if(verbose) {
		cout << "Upper bound for " << ref.GetSnpID() << " (upper.pos = " << upper.pos << "):\n";
		pit.second->print();
		if(pit.second == vsnp.begin())
			cout << "Have reached the beginning.\n";
	}

	if((pit.first == vsnp.end()) || (pit.second == vsnp.begin())) {
	  return false;
	} else {
	  return true;
	}
}

Family CSnpPanel::GetFamily(const std::string strChildID) const
{
	Family family;

	return family;
}

Parent CSnpPanel::GetParent(const mapSampleIt child) const
{
	Parent parent;
	parent.first = sample.end();
	parent.second = sample.end();
	if(child != sample.end())
	{
		PairedString father;
		father.first = child->first.first;
		father.second = child->second.GetFatherID();
		parent.first = sample.find(father);
		PairedString mother;
		mother.first = child->first.first;
		mother.second = child->second.GetMotherID();
		parent.second = sample.find(mother);
	}
	return parent;
}

ParentIndex CSnpPanel::GetParentIndex(const Parent parent) const
{
	ParentIndex pidx(-1,-1);
	if(parent.first != sample.end())// father
		pidx.first = parent.first->second.GetStorageIndex();
	if(parent.second != sample.end())// mother
		pidx.second = parent.second->second.GetStorageIndex();
	return pidx;
}

ParentIndex CSnpPanel::GetParentIndex(const mapSampleIt child) const
{
	ParentIndex pidx(-1,-1);
	Parent parent = GetParent(child);
	if(parent.first != sample.end())// father
		pidx.first = parent.first->second.GetStorageIndex();
	if(parent.second != sample.end())// mother
		pidx.second = parent.second->second.GetStorageIndex();

	return pidx;
}

std::vector<CSnpData>::const_iterator CSnpPanel::GetLastSnpBefore(std::vector<CSnpData>::const_iterator it) {
	// TO BE WRITTEN
	// Should be a straightforward use of STL binary search
	return vsnp.begin();
}

std::vector<CSnpData>::const_iterator CSnpPanel::GetFirstSnpAfter(std::vector<CSnpData>::const_iterator it) {
	// TO BE WRITTEN
	// Should be a straightforward use of STL binary search
	return vsnp.begin();
}

//////////////////////////////////////////////////////////////////////


double affxsnp::CalculateRS(const CSnpData& m1, const CSnpData& m2) {
	unsigned int nSample = m1.GetNsample();
	std::vector<std::pair<unsigned int, std::string> > sampleIndex;
	for(unsigned int i=0; i<nSample; i++) {
		std::pair<unsigned int, std::string> p(i,"null");
		sampleIndex.push_back(p);
	}
	return(CalculateRS(m1,m2,sampleIndex));
}


double affxsnp::CalculateRS(const CSnpData& m1, const CSnpData& m2, std::vector<std::pair<unsigned int, std::string> >& sampleIndex)
{
	return(CalculateRS(m1,m2,sampleIndex,false));
}

double affxsnp::CalculateRS(const CSnpData& m1, const CSnpData& m2, std::vector<std::pair<unsigned int, std::string> >& sampleIndex, bool verbose)
{
	if(verbose) {
		m1.print();
		m2.print();
	}

	int m1_A = (int) m1.GetAllele1();
	int m1_B = (int) m1.GetAllele2();
	int m2_A = (int) m2.GetAllele1();
	int m2_B = (int) m2.GetAllele2();

	/* Read data to determine how many of each haplotype we have */
	std::vector<CGenotype> geno1 = m1.GetGeno();
	std::vector<CGenotype> geno2 = m2.GetGeno();
	int nAA=0;
	int nAB=0;
	int nBA=0;
	int nBB=0;
	int nDH=0; // DH = DoubleHet
	int nN = 0;
	for(std::vector<std::pair<unsigned int, std::string> >::const_iterator sample_it = sampleIndex.begin(); sample_it != sampleIndex.end(); sample_it++) {
		unsigned int i = sample_it->first;
		int h11 = (int) geno1[i].GetHap1();
		int h12 = (int) geno1[i].GetHap2();
		bool p1 = geno1[i].isPhased();
		int h21 = (int) geno2[i].GetHap1();
		int h22 = (int) geno2[i].GetHap2();
		bool p2 = geno2[i].isPhased();
		if((h11 == NUCLEOTIDE_N) || (h12 == NUCLEOTIDE_N) || (h21 == NUCLEOTIDE_N) || (h22 == NUCLEOTIDE_N)) {
			// There is at least one N
			// HaploView just ignores it, though it seems one can try extract more info.  However for consistency with Haploview we'll do the same thing...
			nN+=2;
			//if((h11 == NUCLEOTIDE_N && h12 == NUCLEOTIDE_N) || (h21 == NUCLEOTIDE_N && h22 == NUCLEOTIDE_N) || !(p1 && p2)) {
			//	// Cannot determine a haplotype at all
			//	nN += 2;
			//} else {
			//	// Have at least one non-N at each marker, and markers are phased
			//	int m1,m2;
			//	bool haveHap;
			//	if(h11 != NUCLEOTIDE_N && h21 != NUCLEOTIDE_N) {
			//		m1 = h11;
			//		m2 = h21;
			//		haveHap=true;
			//	} else if(h12 != NUCLEOTIDE_N && h22 != NUCLEOTIDE_N) {
			//		m1 = h12;
			//		m2 = h22;
			//		haveHap=true;
			//	} else {
			//		haveHap=false;
			//	}
			//	if(haveHap) {
			//		if(m1 == m1_A) {
			//			if(m2 == m2_A)
			//				nAA++;
			//			else
			//				nAB++;
			//		} else {
			//			if(m2 == m2_A)
			//				nBA++;
			//			else
			//				nBB++;
			//		}
			//		nN++;
			//	} else {
			//		nN += 2;
			//	}
			//}
		} else if ((h11 != h12) && (h21 != h22)) {
			if(p1 && p2) {
				if((h11 == m1_A && h21 == m2_A) || (h11 == m1_B && h21 == m2_B)) {
					nAA++;
					nBB++;
				} else {
					nAB++;
					nBA++;
				}
			} else {
				nDH++;
			}
		} else {
			// If we reached this point we know there are no N's and we don't have a double het
			if((h11 == m1_A) && (h12 == m1_A)) {
				// m1 is AA
				if(h21 == h22) {
					// m2 is homozygous
					if(h21 == m2_A) {
						nAA += 2;
					} else if(h21 == m2_B) {
						nAB += 2;
					} else {
						// This shouldn't be possible
						throw "logic error in determining haplotypes\n";
					}
				} else {
					// m2 is het
					nAA++;
					nAB++;
				}
			} else if((h11 == m1_B) && (h12 == m1_B)) {
				// m1 is BB
				if(h21 == h22) {
					// m2 is homozygous
					if(h21 == m2_A) {
						nBA += 2;
					} else if(h21 == m2_B) {
						nBB += 2;
					} else {
						throw "logic error in determining haplotypes\n";
					}
				} else {
					// m2 is het
					nBA++;
					nBB++;
				}
			} else {
				// m1 is het so m2 must be homozygous
				if(h21 == m2_A) {
					nAA++;
					nBA++;
				} else if(h21 == m2_B) {
					nAB++;
					nBB++;
				} else {
					throw "logic error in determining haplotypes\n";
				}
			}
		}
		if(verbose)
			cout << sample_it->second << "\t(" << h11 << "," << h12 << ")\t(" << h21 << "," << h22 <<")\t" << ((int)p1) << " " << ((int)p2) << "\tSums:\t" << nAA << ", " << nAB << ", " << nBA << ", " << nBB << ", " << nDH << "\n";
	}
	double nChrom = nAA + nAB + nBA + nBB + 2*nDH;
	assert((int)(2*sampleIndex.size()-nN) == (int) nChrom);
	if(verbose)
		cout << nAA << "\t" << nAB << "\t" << nBA << "\t" << nBB << "\t" << nDH << "\t" << nN << "\n";

	/* Deal with case where either marker is monomorphic */
	bool m1_monoMorphic = (nAA+nAB == 0) || (nBA+nBB == 0);
	bool m2_monoMorphic = (nAA+nBA == 0) || (nAB+nBB == 0);
	if((m1_monoMorphic || m2_monoMorphic) && (nDH==0)) {
		return(0);
	}

	/* If we have missing data (i.e. un-phased double hets) then use EM algorithm. */
	double pAA;
	double pAB;
	double pBA;
	double pBB;
	if(nDH>0) {
		/* Set initial probs */
		double divisor = (4.0*R2_EM_INITIALIZATION_FUDGE_FACTOR) + (double) nChrom;
		pAA=(nAA + R2_EM_INITIALIZATION_FUDGE_FACTOR) / divisor;
		pAB=(nAB + R2_EM_INITIALIZATION_FUDGE_FACTOR) / divisor;
		pBA=(nBA + R2_EM_INITIALIZATION_FUDGE_FACTOR) / divisor;
		pBB=(nBB + R2_EM_INITIALIZATION_FUDGE_FACTOR) / divisor;

		double nDH_AA_BB; /* number of double hets which are AA + BB */
		double nDH_AB_BA; /* number of double hets which are AB + BA */

		double oldLogLik=-1e10;
		for(int i=0; i<R2_EM_MAX_ITERATIONS; i++) {
			/* E-step */
			double pAA_BB = pAA * pBB;
			double pAB_BA = pAB * pBA;
			nDH_AA_BB = pAA_BB/(pAA_BB + pAB_BA) * (double) nDH;
			nDH_AB_BA = ((double) nDH) - nDH_AA_BB;

			/* M-step */
			pAA = (((double)nAA) + nDH_AA_BB) / nChrom;
			pAB = (((double)nAB) + nDH_AB_BA) / nChrom;
			pBA = (((double)nBA) + nDH_AB_BA) / nChrom;
			pBB = (((double)nBB) + nDH_AA_BB) / nChrom;

			/* Iteration complete, check if we can terminate and verify that likelihood has increased */
			double logLik = ((double)nAA) * slog(pAA) + ((double)nAB) * slog(pAB) + ((double)nBA) * slog(pBA) + ((double)nBB) * slog(pBB) + ((double)nDH) * slog(pAA*pBB + pAB*pBA);
			if(i > 0) {
				//assert(logLik > oldLogLik-EPSILON); /* total likelihood should be non-decreasing, else we have a bug */
				if(logLik-oldLogLik < R2_EM_LIKELIHOOD_CONVERSION_TOLERANCE)
					break;
				
			}
			oldLogLik = logLik;
			if(verbose)
				cout << "EM:\t" << (i+1) << "\t" << pAA << "\t" << pAB << "\t" << pBA << "\t" << pBB << "\t" << logLik << "\n";
		}
	} else {
		pAA = ((double)nAA) / nChrom;
		pAB = ((double)nAB) / nChrom;
		pBA = ((double)nBA) / nChrom;
		pBB = ((double)nBB) / nChrom;
	}

	double p_Ax = pAA + pAB;
	double p_xA = pAA + pBA;
	double p_Bx = pBA + pBB;
	double p_xB = pAB + pBB;

	double D = pAA - p_Ax * p_xA;
	double r2 = D*D / (p_Ax * p_xA * p_Bx * p_xB);

	return r2;
}


void affxsnp::AllPairwiseLDBetweenPanels(
	CSnpPanel *refPanel,
	CSnpPanel *testPanel,
	const double& windowwidth,
	const bool skipSelf,
	const bool freePass
) {
	std::vector<CSnpPair> result;
	AllPairwiseLDBetweenPanels(
		refPanel,
		testPanel,
		windowwidth,
		skipSelf,
		freePass,
		result,
		false,
		true
	);
}

void affxsnp::AllPairwiseLDBetweenPanels(
	CSnpPanel *refPanel,
	CSnpPanel *testPanel,
	const double& windowwidth,
	const bool skipSelf,
	const bool freePass,
	std::vector<CSnpPair>& result)
{
	AllPairwiseLDBetweenPanels(
		refPanel,
		testPanel,
		windowwidth,
		skipSelf,
		freePass,
		result,
		true,
		false
	);
}

void affxsnp::AllPairwiseLDBetweenPanels(
	CSnpPanel *refPanel,
	CSnpPanel *testPanel,
	const double& windowwidth,
	const bool skipSelf,
	const bool freePass,
	std::vector<CSnpPair>& result,
	const bool storeResult,
	const bool printResult) 
{
	bool onePanel;
	double leftWindowSize,rightWindowSize;
	if(refPanel == testPanel) {
		onePanel = true;
		leftWindowSize = 0;
		rightWindowSize = windowwidth;
	} else {
		onePanel = false;
		leftWindowSize = windowwidth;
		rightWindowSize = windowwidth;
	}

	// set the window width in which to compute r^2
	CSnpWithin::SetWindowWidth(windowwidth);

	// Identify indices of unrelated samples (more precisely, samples for which neither parent is included in the panel)
	std::vector <std::pair <unsigned int, std::string> > unrelated;
	refPanel->GetUnrelatedSampleIndex(unrelated);
	int counter=0;
	for(vSnpIt refit = refPanel->vsnp.begin(); refit != refPanel->vsnp.end(); refit ++)
	{
		counter++;
		//if(0 == counter % 1000)
		//	cout << "on SNP " << counter << "\n";

		PairedSnpIt pit;
		if(!testPanel->GetSNPsNearTo(*refit,leftWindowSize,rightWindowSize,pit))
			continue;
		for(vSnpIt testit = pit.first; (testit < pit.second) && (testit != testPanel->vsnp.end()); testit ++)
		{
			double r2;
			if(skipSelf && (refit->GetSeq() == testit->GetSeq()) && (fabs(refit->GetPos() - testit->GetPos()) < EPSILON)) {
				continue;
			}
			if(freePass && (refit->GetSeq() == testit->GetSeq()) && (fabs(refit->GetPos() - testit->GetPos()) < EPSILON)) {
				// give a free pass to SNPs mapping to the same spot and assume they have r2=1.
				r2=1;
			} else {
				//if((refit->GetSnpID() == "IGR1118a_1") && (testit->GetSnpID() == "IGR1219a_2")) {
				//	r2 = CalculateRS(*refit, *testit, unrelated, true);
				//	cout << refit->GetSnpID() << "\t" << refit->GetPos() << "\t" << testit->GetSnpID() << "\t" << testit->GetPos() << "\t" << r2 << "\n";
				//	exit(0);
				//}
				r2 = CalculateRS(*refit, *testit, unrelated);
			}
			if(storeResult) {
				CSnpPair snpPair;
				snpPair.SetSnpID1(refit->GetSnpID());
				snpPair.SetSeq1(refit->GetSeq());
				snpPair.SetPos1(refit->GetPos());
				snpPair.SetSnpID2(testit->GetSnpID());
				snpPair.SetSeq2(testit->GetSeq());
				snpPair.SetPos2(testit->GetPos());
				snpPair.SetR2(r2);
				result.push_back(snpPair);
			}
			if(printResult) {
				cout << std::setprecision(0);
				cout << refit->GetSnpID() << "\t" << refit->GetPos() << "\t" << testit->GetSnpID() << "\t" << testit->GetPos();
				cout << std::setprecision(4);
				cout << "\t" << r2 << "\n";
			}
		}
	}
}

/*
int affxsnp::ProcessLDBetweenPanels(
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	bool skipSelf,
	bool freePass,
	bool AllPairRsq) 
{
	double callRate=1;
	double callRateHom=1;
	double callRateHet=1;
	double errorRateHom=1;
	double errorRateHet=1;
	if (AllPairRsq)
	{
  		return ProcessLDBetweenPanels(refPanel, testPanel, windowwidth, refSnpIndex, ecdf, result, callRate, callRateHom, callRateHet, errorRateHom, errorRateHet, skipSelf, freePass, AllPairRsq);
	}
	else
	{
		return ProcessLDBetweenPanels(refPanel, testPanel, windowwidth, refSnpIndex, ecdf, result, callRate, callRateHom, callRateHet, errorRateHom, errorRateHet, skipSelf, freePass);
	}
}


int affxsnp::ProcessLDBetweenPanels(
	CSnpPanel& refPanel, 
	CSnpPanel& testPanel, 
	const double& windowwidth, 
	const std::vector<int> &refSnpIndex, 
	vPairDouble& ecdf) 
{
	std::vector<CSnpPair>* result=NULL;
	double callRate=1;
	double callRateHom=1;
	double callRateHet=1;
	double errorRateHom=1;
	double errorRateHet=1;
	bool skipSelf=false;
	bool freePass=false;
  	return ProcessLDBetweenPanels(refPanel, testPanel, windowwidth, refSnpIndex, ecdf, result, callRate, callRateHom, callRateHet, errorRateHom, errorRateHet, skipSelf, freePass);
}
*/


// the current defination
int affxsnp::ProcessLDBetweenPanels(
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	opt2 o,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	std::string vname)
{
	double callRate=1;
	double callRateHom=1;
	double callRateHet=1;
	double errorRateHom=1;
	double errorRateHet=1;	
	//bool skipSelf=o.skipSelf;
	//bool freePass=o.freePass;

	std::fstream tableFH;
	if (o.AllPairRsq)//declear the full table file handler
	{
		std::string tableFile = o.resultFileBase + "." + vname + ".full.table.txt";
		tableFH.open(tableFile.c_str(),ios::out);
	}

	// set the window width in front
	CSnpWithin::SetWindowWidth(o.window);

	// set up the ecdf bins - if it is empty use a default of 100 bins, otherwise use whatever length it has been initialized to.
	if(ecdf.size() == 0)
		ecdf.resize(ECDF_DEFAULT_BIN_SIZE);
	int nBins = ecdf.size();
	double binSize = 1/double(nBins);
	int i = 0;
	for(i = 0; i < nBins; i ++) {
		ecdf[i].first = (i+1)*binSize;
		ecdf[i].second = 0;
	}

	// Identify indices of unrelated samples (more precisely, samples for which neither parent is included in the panel)
	std::vector <std::pair <unsigned int, std::string> > unrelated;
	refPanel.GetUnrelatedSampleIndex(unrelated);

	int count=0;
	cout << setiosflags(ios::fixed);
	std::vector<int>::const_iterator refIndexIt;
	CSnpData refSnp,testSnp;
	std::map<CSnpInfo, CSnpData, CSnpInfo::ltSnpInfo> preDefinedTestSnp;
	std::map<CSnpInfo, CSnpData, CSnpInfo::ltSnpInfo>::const_iterator preDefinedTestSnp_it;
	for(refIndexIt = refSnpIndex.begin(); refIndexIt != refSnpIndex.end(); refIndexIt++)
	{
		// Make a copy of the reference SNP
		refSnp = *(refPanel.vsnp.begin() + *refIndexIt);
		//cout << "#" << refSnp.GetSnpID() << "\n";
		double max_rs=0, rs=0, max_pos=-1;
		std::string max_seq="", max_snpID="";
		PairedSnpIt pit;

		if(testPanel.GetSNPsNearTo(refSnp,o.window,pit) && (pit.first < pit.second)) {
		//cout << "# Have some SNPs to check for " << refSnp.GetSnpID() << "\n";
			refPanel.PhaseTrios(refSnp);
			for(vSnpIt testit = pit.first; (testit < pit.second) && (testit != testPanel.vsnp.end()); testit ++)
			{
		//cout << "# comparing " << refSnp.GetSnpID() << " with " << testit->GetSnpID() << "\n";
				if(o.skipSelf && (refSnp.GetSeq() == testit->GetSeq()) && (fabs(refSnp.GetPos() - testit->GetPos()) < EPSILON))
					continue;
				if(o.freePass && (refSnp.GetSeq() == testit->GetSeq()) && (fabs(refSnp.GetPos() - testit->GetPos()) < EPSILON)) {
					// give a free pass to SNPs mapping to the same spot and assume they have r2=1.
					rs = 1;
				} else {
					// Check if we already phased (and possibly simulated) this SNP
					CSnpInfo key(testit->GetSnpID(), testit->GetSeq(), testit->GetPos());
					if((preDefinedTestSnp_it = preDefinedTestSnp.find(key)) == preDefinedTestSnp.end()) {
						// Haven't encountered this SNP yet, set it up & store it.
						testSnp = *testit;
						testSnp.ApplyCallRate(callRate);
						testSnp.ApplyCallRateHet(callRateHet);
						testSnp.ApplyCallRate(callRateHom);
						testSnp.ApplyErrorRate(errorRateHom,errorRateHet);
						testPanel.PhaseTrios(testSnp);
						preDefinedTestSnp.insert(std::map <CSnpInfo, CSnpData>::value_type(key,testSnp));
					} else {
						// Already set up this SNP, retrieve the precomputed data
						testSnp = preDefinedTestSnp_it->second;
					}
					rs = CalculateRS(refSnp, testSnp, unrelated);
				}

				if (o.AllPairRsq)
				{
					tableFH << std::setiosflags(ios::fixed);
					tableFH << std::setprecision(0);
					tableFH << refSnp.GetSnpID() << "\t" << refSnp.GetPos() << "\t" << testSnp.GetSnpID() << "\t"<< testSnp.GetPos(); 
					tableFH << std::setprecision(4);
					tableFH << "\t" << rs <<"\n";
				}

				if(rs > max_rs) {
					max_rs = rs;
					max_seq = testSnp.GetSeq();
					max_pos = testSnp.GetPos();
					max_snpID = testSnp.GetSnpID();
				}
				if(max_rs > 1-EPSILON)
					break;
			}
		}
		if(result != NULL) {
			CSnpPair snpPair;
			snpPair.SetSnpID1(refSnp.GetSnpID());
			snpPair.SetSeq1(refSnp.GetSeq());
			snpPair.SetPos1(refSnp.GetPos());
			snpPair.SetSnpID2(max_snpID);
			snpPair.SetSeq2(max_seq);
			snpPair.SetPos2(max_pos);
			snpPair.SetR2(max_rs);
			(*result).push_back(snpPair);
		}
		int bin = ((int) ceil(max_rs/binSize)) - 1;
		// make sure that rs = 0 and 1 don't overstep the bounds
		if(bin == -1) {
			bin++;
		} else if(bin == nBins) {
			bin--;
		}
		assert((bin >= 0) && (bin < nBins));
		ecdf[bin].second += 1;
		count++;
	}

	if (o.AllPairRsq)
	{
		tableFH.close();
	}

	// Now make it into an ECDF by computing cumulative totals and scaling by total count
	double cum_sum=0;
	for(i = 0; i < nBins; i ++) {
		cum_sum += ecdf[i].second;
		ecdf[i].second = cum_sum;
	}
	for(i = 0; i < nBins; i ++)
		ecdf[i].second /= (double) count;

	return count;
}


/*
int affxsnp::ProcessLDBetweenPanels(//this function record the largest r^2 for each ref SNP, and output CDF
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	const double callRate,
	const double callRateHom,
	const double callRateHet,
	const double errorRateHom,
	const double errorRateHet,
	const bool skipSelf,
	const bool freePass)
{
	// set the window width in front
	CSnpWithin::SetWindowWidth(windowwidth);

	// set up the ecdf bins - if it is empty use a default of 100 bins, otherwise use whatever length it has been initialized to.
	if(ecdf.size() == 0)
		ecdf.resize(ECDF_DEFAULT_BIN_SIZE);
	int nBins = ecdf.size();
	double binSize = 1/double(nBins);
	int i = 0;
	for(i = 0; i < nBins; i ++) {
		ecdf[i].first = (i+1)*binSize;
		ecdf[i].second = 0;
	}

	// Identify indices of unrelated samples (more precisely, samples for which neither parent is included in the panel)
	std::vector <std::pair <unsigned int, std::string> > unrelated;
	refPanel.GetUnrelatedSampleIndex(unrelated);

	int count=0;
	cout << setiosflags(ios::fixed);
	std::vector<int>::const_iterator refIndexIt;
	CSnpData refSnp,testSnp;
	std::map<CSnpInfo, CSnpData, CSnpInfo::ltSnpInfo> preDefinedTestSnp;
	std::map<CSnpInfo, CSnpData, CSnpInfo::ltSnpInfo>::const_iterator preDefinedTestSnp_it;
	for(refIndexIt = refSnpIndex.begin(); refIndexIt != refSnpIndex.end(); refIndexIt++)
	{
		// Make a copy of the reference SNP
		refSnp = *(refPanel.vsnp.begin() + *refIndexIt);
//cout << "#" << refSnp.GetSnpID() << "\n";
		double max_rs=0, rs=0, max_pos=-1;
		std::string max_seq="", max_snpID="";
		PairedSnpIt pit;
//if(refSnp.GetSnpID() == "11511647") {
//	testPanel.GetSNPsNearTo(refSnp,windowwidth,pit,true);
//}
		if(testPanel.GetSNPsNearTo(refSnp,windowwidth,pit) && (pit.first < pit.second)) {
//cout << "# Have some SNPs to check for " << refSnp.GetSnpID() << "\n";
			refPanel.PhaseTrios(refSnp);
			for(vSnpIt testit = pit.first; (testit < pit.second) && (testit != testPanel.vsnp.end()); testit ++)
			{
//cout << "# comparing " << refSnp.GetSnpID() << " with " << testit->GetSnpID() << "\n";
				if(skipSelf && (refSnp.GetSeq() == testit->GetSeq()) && (fabs(refSnp.GetPos() - testit->GetPos()) < EPSILON))
					continue;
				if(freePass && (refSnp.GetSeq() == testit->GetSeq()) && (fabs(refSnp.GetPos() - testit->GetPos()) < EPSILON)) {
					// give a free pass to SNPs mapping to the same spot and assume they have r2=1.
					rs = 1;
				} else {
					// Check if we already phased (and possibly simulated) this SNP
					CSnpInfo key(testit->GetSnpID(), testit->GetSeq(), testit->GetPos());
					if((preDefinedTestSnp_it = preDefinedTestSnp.find(key)) == preDefinedTestSnp.end()) {
						// Haven't encountered this SNP yet, set it up & store it.
						testSnp = *testit;
						testSnp.ApplyCallRate(callRate);
						testSnp.ApplyCallRateHet(callRateHet);
						testSnp.ApplyCallRate(callRateHom);
						testSnp.ApplyErrorRate(errorRateHom,errorRateHet);
						testPanel.PhaseTrios(testSnp);
						preDefinedTestSnp.insert(std::map <CSnpInfo, CSnpData>::value_type(key,testSnp));
					} else {
						// Already set up this SNP, retrieve the precomputed data
						testSnp = preDefinedTestSnp_it->second;
					}
					rs = CalculateRS(refSnp, testSnp, unrelated);
				}
				if(rs > max_rs) {
					max_rs = rs;
					max_seq = testSnp.GetSeq();
					max_pos = testSnp.GetPos();
					max_snpID = testSnp.GetSnpID();
				}
				//if(refSnp.GetSnpID() == "11511647") {
				//	refSnp.print();
				//	testit->print();
				//	testSnp.print();
				//	cout << std::setprecision(0);
				//	cout << refSnp.GetSnpID() << "\t" << refSnp.GetSeq() << "\t" << refSnp.GetPos() << "\t" << testSnp.GetSnpID() << "\t" << testSnp.GetSeq() << "\t" << testSnp.GetPos();
				//	cout << std::setprecision(4);
				//	cout << "\t" << rs << "\n";
				//	cout << "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn\n";
				//}
				// If we find something with r2 of 1 then we can stop
				if(max_rs > 1-EPSILON)
					break;
			}
		}
		if(result != NULL) {
			CSnpPair snpPair;
			snpPair.SetSnpID1(refSnp.GetSnpID());
			snpPair.SetSeq1(refSnp.GetSeq());
			snpPair.SetPos1(refSnp.GetPos());
			snpPair.SetSnpID2(max_snpID);
			snpPair.SetSeq2(max_seq);
			snpPair.SetPos2(max_pos);
			snpPair.SetR2(max_rs);
			(*result).push_back(snpPair);
		}
		int bin = ((int) ceil(max_rs/binSize)) - 1;
		// make sure that rs = 0 and 1 don't overstep the bounds
		if(bin == -1) {
			bin++;
		} else if(bin == nBins) {
			bin--;
		}
		assert((bin >= 0) && (bin < nBins));
		ecdf[bin].second += 1;
		count++;
	}

	// Now make it into an ECDF by computing cumulative totals and scaling by total count
	double cum_sum=0;
	for(i = 0; i < nBins; i ++) {
		cum_sum += ecdf[i].second;
		ecdf[i].second = cum_sum;
	}
	for(i = 0; i < nBins; i ++)
		ecdf[i].second /= (double) count;

	return count;
}

int affxsnp::ProcessLDBetweenPanels(//this function record all pairwise r^2
	CSnpPanel& refPanel,
	CSnpPanel& testPanel,
	const double& windowwidth,
	const std::vector<int> &refSnpIndex,
	vPairDouble& ecdf,
	std::vector<CSnpPair>* result,
	const double callRate,
	const double callRateHom,
	const double callRateHet,
	const double errorRateHom,
	const double errorRateHet,
	const bool skipSelf,
	const bool freePass,
	bool AllPairRsq)
{
	// set the window width in front
	CSnpWithin::SetWindowWidth(windowwidth);

	// set up the ecdf bins - if it is empty use a default of 100 bins, otherwise use whatever length it has been initialized to.
	if(ecdf.size() == 0)
		ecdf.resize(ECDF_DEFAULT_BIN_SIZE);
	int nBins = ecdf.size();
	double binSize = 1/double(nBins);
	int i = 0;
	for(i = 0; i < nBins; i ++) {
		ecdf[i].first = (i+1)*binSize;
		ecdf[i].second = 0;
	}

	// Identify indices of unrelated samples (more precisely, samples for which neither parent is included in the panel)
	std::vector <std::pair <unsigned int, std::string> > unrelated;
	refPanel.GetUnrelatedSampleIndex(unrelated);

	int count=0;
	cout << setiosflags(ios::fixed);
	std::vector<int>::const_iterator refIndexIt;
	CSnpData refSnp,testSnp;
	std::map<CSnpInfo, CSnpData, CSnpInfo::ltSnpInfo> preDefinedTestSnp;
	std::map<CSnpInfo, CSnpData, CSnpInfo::ltSnpInfo>::const_iterator preDefinedTestSnp_it;
	for(refIndexIt = refSnpIndex.begin(); refIndexIt != refSnpIndex.end(); refIndexIt++)
	{
		// Make a copy of the reference SNP
		refSnp = *(refPanel.vsnp.begin() + *refIndexIt);
//cout << "#" << refSnp.GetSnpID() << "\n";
		double max_rs=0, rs=0, max_pos=-1;
		std::string max_seq="", max_snpID="";
		PairedSnpIt pit;
//if(refSnp.GetSnpID() == "11511647") {
//	testPanel.GetSNPsNearTo(refSnp,windowwidth,pit,true);
//}
		if(testPanel.GetSNPsNearTo(refSnp,windowwidth,pit) && (pit.first < pit.second)) {
//cout << "# Have some SNPs to check for " << refSnp.GetSnpID() << "\n";
			refPanel.PhaseTrios(refSnp);
			for(vSnpIt testit = pit.first; (testit < pit.second) && (testit != testPanel.vsnp.end()); testit ++)
			{
//cout << "# comparing " << refSnp.GetSnpID() << " with " << testit->GetSnpID() << "\n";
				if(skipSelf && (refSnp.GetSeq() == testit->GetSeq()) && (fabs(refSnp.GetPos() - testit->GetPos()) < EPSILON))
					continue;
				if(freePass && (refSnp.GetSeq() == testit->GetSeq()) && (fabs(refSnp.GetPos() - testit->GetPos()) < EPSILON)) {
					// give a free pass to SNPs mapping to the same spot and assume they have r2=1.
					rs = 1;
				} else {
					// Check if we already phased (and possibly simulated) this SNP
					CSnpInfo key(testit->GetSnpID(), testit->GetSeq(), testit->GetPos());
					if((preDefinedTestSnp_it = preDefinedTestSnp.find(key)) == preDefinedTestSnp.end()) {
						// Haven't encountered this SNP yet, set it up & store it.
						testSnp = *testit;
						testSnp.ApplyCallRate(callRate);
						testSnp.ApplyCallRateHet(callRateHet);
						testSnp.ApplyCallRate(callRateHom);
						testSnp.ApplyErrorRate(errorRateHom,errorRateHet);
						testPanel.PhaseTrios(testSnp);
						preDefinedTestSnp.insert(std::map <CSnpInfo, CSnpData>::value_type(key,testSnp));
					} else {
						// Already set up this SNP, retrieve the precomputed data
						testSnp = preDefinedTestSnp_it->second;
					}
					rs = CalculateRS(refSnp, testSnp, unrelated);
				}
				if(rs > max_rs) {
					max_rs = rs;
					max_seq = testSnp.GetSeq();
					max_pos = testSnp.GetPos();
					max_snpID = testSnp.GetSnpID();
				}
				if(max_rs > 1-EPSILON)
					break;
			}
		}
		if(result != NULL) {
			CSnpPair snpPair;
			snpPair.SetSnpID1(refSnp.GetSnpID());
			snpPair.SetSeq1(refSnp.GetSeq());
			snpPair.SetPos1(refSnp.GetPos());
			snpPair.SetSnpID2(max_snpID);
			snpPair.SetSeq2(max_seq);
			snpPair.SetPos2(max_pos);
			snpPair.SetR2(max_rs);
			(*result).push_back(snpPair);
		}
		int bin = ((int) ceil(max_rs/binSize)) - 1;
		// make sure that rs = 0 and 1 don't overstep the bounds
		if(bin == -1) {
			bin++;
		} else if(bin == nBins) {
			bin--;
		}
		assert((bin >= 0) && (bin < nBins));
		ecdf[bin].second += 1;
		count++;
	}

	// Now make it into an ECDF by computing cumulative totals and scaling by total count
	double cum_sum=0;
	for(i = 0; i < nBins; i ++) {
		cum_sum += ecdf[i].second;
		ecdf[i].second = cum_sum;
	}
	for(i = 0; i < nBins; i ++)
		ecdf[i].second /= (double) count;

	return count;
}
*/

void affxsnp::CSnpPanel::Read(std::string panelFile) {
	Read(panelFile, false);
}

void affxsnp::CSnpPanel::Read(std::string panelFile, bool verbose) {
	std::string errmsg;
	std::vector<PANEL_FILES> inFile;
	PANEL_FILES f;

// TODO:
// This function starts to get a low slower once the first few batches have been read in, presumably
// because of the push_back() being called on every added SNP.  It could be made faster, perhaps
// significantly so, by doing a first-pass through all the info and ped files to determine the max number
// of SNPs and samples, then allocating all memory, filling it, and trimming back vector size as needed.

	std::ifstream panelFh(panelFile.c_str(), std::ios_base::in);
	if(!panelFh)
		throw(errmsg = "Problem opening panel file " + panelFile + "\n");

	int lineNo=0;
	std::string seq,ped,info;
	while(panelFh.good()) {
		lineNo++;
		if(!(panelFh >> seq >> ped >> info))
			continue;
		if(panelFh.fail())
			throw(errmsg = "Problem reading line from " + panelFile + "\n");
		f.seq = seq;
		f.ped = ped;
		f.info = info;
		inFile.push_back(f);
	}
	panelFh.close();

	if(verbose)
		cout << "#  Panel consists of " << inFile.size() << " entries\n";

	if(inFile.size() > 0) {
		Read(inFile[0].ped,inFile[0].info,inFile[0].seq);
		unsigned int n = GetNumberSnps();
		if(verbose) {
			cout << setiosflags(ios::fixed);
			cout << std::setprecision(0);
			cout << "#    " << inFile[0].seq << ":\t" << n << "\n";
		}
		for(unsigned int i=1; i<inFile.size(); i++) {
			Add(inFile[i].ped,inFile[i].info,inFile[i].seq);
			unsigned int n_new = GetNumberSnps();
			if(verbose)
				cout << "#    " << inFile[i].seq << ":\t" << (n_new - n) << "\n";
			n = n_new;
		}
	}
}

CSnpInfo::CSnpInfo(std::string i, std::string s, double p) {
	snpID = i;
	seq = s;
	pos = p;
}

// multiple marker mode sub
void HaplotypeData::SetAlleles() 
{
	std::vector<Haplotype>::const_iterator g;
	int n_alleles_found = 0;
	int n_allele1=0;
	int n_allele2=0;
	for(g = haplo.begin(); g != haplo.end(); g++) 
	{
		if(n_alleles_found == 0) 
		{
			// process the 1st allele
			if(g->GetHap1() != '0') 
			{
				allele1 = g->GetHap1();
				n_alleles_found=1;
				n_allele1 = 1;
			}
			
			// process the 2nd allele
			if (g->GetHap2() != '0') 
			{
				if(n_alleles_found == 0) 
				{
					allele1 = g->GetHap2();
					n_alleles_found=1;
					n_allele1 = 1;
				} 
				else if(g->GetHap2() != allele1) 
				{
					allele2 = g->GetHap2();
					n_alleles_found=2;
					n_allele2 = 1;
				}
				else if(g->GetHap2() == allele1) 
				{
					n_allele1++;
				}
			}
		} 
		else if(n_alleles_found == 1) 
		{
			if((g->GetHap1() != '0') && (g->GetHap1() != allele1)) 
			{
				allele2 = g->GetHap1();
				n_alleles_found=2;
				n_allele2 = 1;
				/*if(g->GetHap2() == allele1) 
				{
					n_allele1++;
				} 
				else if(g->GetHap2() == allele2) 
				{
					n_allele2++;
				} 
				else if (g->GetHap2() != '0') {
					throw "inconsistent alleles found\n";
				}*/
			}
			else if((g->GetHap1() != '0') && (g->GetHap1() == allele1)) 
			{
				n_allele1++;
			} 
			
			// process the 2nd allele
			if(n_alleles_found == 1) 
			{
				if((g->GetHap2() != '0') && (g->GetHap2() != allele1)) 
				{
					allele2 = g->GetHap2();
					n_alleles_found=2;
					n_allele2 = 1;
				}
				else if((g->GetHap2() != '0') && (g->GetHap2() == allele1))
				{
					n_allele1++;
				}
			}
			else if(n_alleles_found == 2) 
			{
				if(g->GetHap2() == allele1) 
				{
					n_allele1++;
				} 
				else if(g->GetHap2() == allele2) 
				{
					n_allele2++;
				} 
				else if (g->GetHap2() != '0') 
				{
					throw "inconsistent alleles found\n";
				}
			}
			/*else if((g->GetHap2() != '0') && (g->GetHap2() != allele1)) 
			{
				allele2 = g->GetHap2();
				n_alleles_found=2;
				n_allele2 = 1;
			}*/
		} 
		else // when (n_alleles_found == 2)
		{
			if(g->GetHap1() == allele1) 
			{
				n_allele1++;
			} 
			else if(g->GetHap1() == allele2) 
			{
				n_allele2++;
			} 
			else if(g->GetHap1() != '0') {
				throw "Unexpected value for first allele\n";
			}

			if(g->GetHap2() == allele1) 
			{
				n_allele1++;
			} 
			else if(g->GetHap2() == allele2) 
			{
				n_allele2++;
			} 
			else if(g->GetHap2() != '0') 
			{
				throw "Unexpected value for first allele\n";
			}
		}
	}
	if(n_allele1 < n_allele2) {
		char temp_char = allele1;
		allele1 = allele2;
		allele2 = temp_char;
		int temp_int = n_allele1;
		n_allele1 = n_allele2;
		n_allele2 = temp_int;
	}
	maf = (n_allele1 > 0) ? ( ((double) n_allele2) / ((double) n_allele1 + n_allele2) ) : 0;

	return;
}

void CSnpPanel::BroadRead(std::string pedFile, std::string infoFile) {
	// See <http://www.broad.mit.edu/mpg/haploview/files.php#formats> for details of the .ped and .info files to be parsed.
	// read info file first so that we know how many snps we are handling
	ReadBroadInfoFile(infoFile);

	// read the ped file second
	ReadBroadPedFile(pedFile);

	// determine the alleles for each SNP
	std::vector<HaplotypeData>::iterator s;
	int count=0;
	for(s = vhaplo.begin(); s != vhaplo.end(); s++){
		s->SetAlleles();
 		count++;
	}

	// sort the info: seq then pos then snpid
	//std::sort(vhaplo.begin(), vhaplo.end(), CSnpLess());

	return;
}

void CSnpPanel::OxfordRead(std::string pedFile, std::string infoFile) {
	// See <http://www.hapmap.org/downloads/phasing/2005-03_phaseI/> for details 
	ReadOxfordInfoFile(infoFile);

	ReadOxfordPedFile(pedFile);

	// determine the alleles for each SNP
	std::vector<HaplotypeData>::iterator s;
	int count=0;
	for(s = vhaplo.begin(); s != vhaplo.end(); s++){
		s->SetAlleles();
 		count++;
	}
	return;
}

void CSnpPanel::ReadBroadInfoFile(std::string infoFile)
{
	// clear the contents if there is any
	vhaplo.clear();

	// assuming no column headers
	// columns were speicified at <http://www.broad.mit.edu/mpg/haploview/files.php#formats>.
	std::ifstream fin(infoFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		std::string errmsg = "Can not open info file :"+ infoFile + ", make sure the file exist.";
		throw errmsg;
	}

	while(fin.good())
	{
		HaplotypeData snp;
		if(!(fin >> snp.snpID >> snp.pos >> snp.target >> snp.predictor)) 
			continue; // skip blank lines
		vhaplo.push_back(snp);
	}

	fin.close();
	if(vhaplo.size() == 0)  
		throw "Invalid info file format!";
}

void CSnpPanel::ReadOxfordInfoFile(std::string infoFile)
{
	// clear the contents if there is any
	vhaplo.clear();

	// columns were speicified at <http://www.hapmap.org/downloads/phasing/2005-03_phaseI/>.
	std::ifstream fin(infoFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		std::string errmsg = "Can not open info file :"+ infoFile + ", make sure the file exist.";
		throw errmsg;
	}

	std::string temp;
	char temp1, temp2;
	fin >> temp >> temp1 >> temp2; 
	while(fin.good())
	{
		HaplotypeData snp;		
		if(!(fin >> snp.pos >> temp1 >> temp2)) 
			continue; // skip blank lines
		char num[30];
		sprintf(num,"SNP.No.%d",snp.pos);
		snp.snpID = num;
		snp.target=1;
		snp.predictor=1;
		vhaplo.push_back(snp);
	}

	fin.close();
	if(vhaplo.size() == 0)  
		throw "Invalid info file format!";
}

void CSnpPanel::ReadBroadPedFile(std::string pedFile)
{
	// clear the contents if there is any
	// Here is the defination of sample
	// std::map <std::pair<std::string,std::string>, CSampleData> sample;
	//sample.clear();

	// assuming no column headers
	// columns were speicified at <http://www.broad.mit.edu/mpg/haploview/files.php#formats>.
	std::ifstream fin(pedFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		std::string errmsg = "Can not open ped file :"+ pedFile + ", make sure the file exist.";
		throw errmsg;
	}

	int nNumSnps = vhaplo.size();

	// the estimated length of line
	int nSamples = 0;
	std::string stream;
	while(fin.good())
	{
		if(!std::getline(fin, stream,'\n')) 
			continue; // skip the blank lines
		if(fin.fail()) 
			throw "Error in reading ped file";
		nSamples++;
	}
	fin.close();
	nSamples = nSamples/2;

	std::string strPedId;
	char strand, allele;
	
	//  will come back to this later
	//  fin.seekg(0,std::ios_base::beg);	
	//	long pos = fin.tellg();
	std::ifstream fin2(pedFile.c_str(), std::ios_base::in);

	for(int s = 0; s < nSamples; s++)
	{
		//Read the 1st Chromosome
		if(!(fin2 >> strPedId))
			continue; // skip blank lines		
		if(fin2.fail()) 
			throw "Error in reading ped file";
		if(!(fin2 >> strand))
			throw "Ped file and info file do not match";
		for(int i = 0; i < nNumSnps; i ++)
		{
			if(s == 0) vhaplo[i].haplo.resize(nSamples);
			if(!(fin2 >> allele))
				throw "Ped file and info file do not match";
			if (allele=='h') {allele='0';}
			vhaplo[i].haplo[s].SetHap1(allele);
		}

		//Read the 2nd Chromosome
		if(!(fin2 >> strPedId))
			continue; // skip blank lines		
		if(fin2.fail()) 
			throw "Error in reading ped file";
		if(!(fin2 >> strand))
			throw "Ped file and info file do not match";
		for(int i = 0; i < nNumSnps; i ++)
		{
			if(!(fin2 >> allele))
				throw "Ped file and info file do not match";
			if (allele=='h') {allele='0';}
			vhaplo[i].haplo[s].SetHap2(allele);
		}
	}

	fin2.close();
	//if(sample.size() == 0) throw "Invalid ped file format!";
}

void CSnpPanel::ReadOxfordPedFile(std::string pedFile)
{
	// assuming no column headers
	// columns were speicified at <http://www.hapmap.org/downloads/phasing/2005-03_phaseI/>.
	std::ifstream fin(pedFile.c_str(), std::ios_base::in);
	if(!fin)
	{
		std::string errmsg = "Can not open ped file :"+ pedFile + ", make sure the file exist.";
		throw errmsg;
	}

	int nNumSnps = vhaplo.size();

	// the estimated length of line
	int nSamples = 0;
	std::string stream;
	while(fin.good())
	{
		if(!std::getline(fin, stream,'\n')) 
			continue; // skip the blank lines
		if(fin.fail()) 
			throw "Error in reading ped file";
		nSamples++;
	}
	fin.close();
	nSamples = nSamples/2;

	std::ifstream fin2(pedFile.c_str(), std::ios_base::in);
	char allele;
	for(int s = 0; s < nSamples; s++)
	{
		for(int i = 0; i < nNumSnps; i ++)
		{
			if(s == 0) vhaplo[i].haplo.resize(nSamples);
			if(!(fin2 >> allele))
				throw "Ped file and info file do not match";
			if (allele=='0') {allele='2';}
			vhaplo[i].haplo[s].SetHap1(allele);
		}

		for(int i = 0; i < nNumSnps; i ++)
		{
			if(!(fin2 >> allele))
				throw "Ped file and info file do not match";
			if (allele=='0') {allele='2';}
			vhaplo[i].haplo[s].SetHap2(allele);
		}
	}
	fin2.close();
}

void HaplotypeData::SetHaplo(unsigned int i, char h1, char h2) {
	if((i<0) || (i>=haplo.size())) {
		throw "Out of bound in sample index for which to set haplotype\n";
		return;
	}
	haplo[i].SetHap1(h1);
	haplo[i].SetHap2(h2);

	return;
}
