from omics_api_utils import *

'''
Set internal settings/paths
keep_unit string: units to use for phenotypes, all compatible units are converted to this unit 
plink_path string:  path to local plink executable
r_path string:  path to local pRscript executable
loglevel  int:  internal verbosity of API, set values using getSetting('SETTING'):
'''
def setVariables(keep_unit=None, plink_path=None,loglevel=None,unit_converter=None,r_path=None, imgsize_large=None, imgsize_xlarge=None,imgsize_xlarge_in=None, imgsize_small=None):
	setVariables0(my_keep_unit=keep_unit, my_plink_path=plink_path,my_loglevel=loglevel,my_unit_converter=unit_converter,my_r_path=r_path, my_imgsize_large=imgsize_large, my_imgsize_xlarge=imgsize_xlarge, my_imgsize_xlarge_in=imgsize_xlarge_in, my_imgsize_small=imgsize_small)
	
'''
SHOW_ALL
SHOW_DEBUG
SHOW_ERRORONLY

TYPE_SNP
TYPE_EXP
TYPE_PHEN
'''
def getSetting(varname):
	return getSetting0(varname)



# Get genes by accession 
# acclist	list[]: list of acccessions 
def get_genes_by_accession(acclist,annot='all'):
	return get_genes_by_accession0(acclist,annot=annot,index_col=None)

# Get genes by position
# poslist	list[]: list of positions, (contig:start-stop or contig:position format) 
def get_genes_by_position(poslist,annot='all'):
	get_genes_by_position0(poslist,annot=annot,index_col=None)

'''
 Read ICGRC API server (https://icgrc.info/api_doc)
myurl		string: URL
url_label	string: job name to use, should be unique and descriptive to each query URL
requery		bool:  every new url_label is cached, set to True to force requery from server and replace any existing cache
'''
def read_url(myurl,urlabel,requery=False):
	return read_url0(myurl,urlabel,requery=requery)


'''
Tools for values multi varables, multi sources distribution plotting
'''

'''
Plot distibution of each columns in the datafrane
df 	pandas dataframe
xlabel 		string: x description
dslabel 	strig: 	y description
'''
def plot_histograms(df, xlabel, dslabel):
	return plot_histograms0(df, xlabel, dslabel)


'''
Plot distibution of multiple dataframes, one plot  for each unique column name. 
The different dataframes are shown as different color bars in the plots. 
dfs 	list[] of pandas dataframes 
xunit 		string: x description
dslabel 	list[] of string: list of url_labels
phenminmaxbin		dict['trait']=(min, max, None): minimum and maximum xrange for trait 'trait'
label2color			dict['url_labels']='color': color of bar for dataset url_labels
nbins	int: number of bins
'''
def plot_histograms_multi(dfs, xunit,dslabels,phenminmaxbin=None,heatmap=False, maporder='phen-source',label2color=dict(),nbins=10,bydataset=None):
	return plot_histograms_multi0(dfs, xunit,dslabels,phenminmaxbin=phenminmaxbin,heatmap=heatmap, maporder=maporder,label2color=label2color,nbins=nbins,bydataset=None)

#  Get the phenotypes  in dataframe
def get_phenotypes(df):
	return get_phenotypes0(df)
	
'''
Convert the units for 'datatype' in 'df' to unit 'to_unit'
datatype  can be 
getSetting('TYPE_PHEN')
getSetting('TYPE_EXP')
getSetting('TYPE_SNP')
'''
def convert_units_to(df, datatype='3 PHEN', to_unit=None):
	return convert_units_to0(df, datatype=datatype, to_unit=to_unit)

''' 
Tools for WGACNA  expressoin-phenotype analysis

# requires R with WGCNA package
# based on R scripts from #https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/index.html
'''

'''
Generates Modules-Traits relation heatmap, based on the tutorial steps 
1. Data input and cleaning
2.a Automatic, one-step network construction and module detection: PDF document, R script
3.1 Relating modules to external clinical traits (first half of step 3)
df   			pandas: dataframe with at least two dataypes, ie. getSetting('TYPE_EXP') and getSetting('TYPE_PHEN'), sharing common samples
dflabel 		string: unique job label
datatypex 		getSetting('TYPE_EXP')
datatypey 		getSetting('TYPE_PHEN')
recalc			boolean: result is cached using dflabel label, set to True to redo calculatons and replace cached
exp_file 		string: path of expression matrix file for large (whole transcriptome) data, as downloaded from api/user/expression/list
It generates two matrices of module x trait, and its corelation heatmap in png and pdf formats: 
dflabel.ModuleTraitRelationship.cor.txt are correlaton
dflabel.ModuleTraitRelationship.corPvalue.txt are P-value of correlation
dflabel.ModuleTraitRelationship.pdf
dflabel.ModuleTraitRelationship.png

This returns the top pairs that meet the cutoff:
cortopn  int: top N pairs sorted by absolute correlation
maxp	float: maximum p-value
mincor  float: minimum absolute correlation

The return is a list of tuples (module,trait,corr,corr p-value)
'''
def do_wgcna_prepare(df,dflabel,datatypex='4 EXP',datatypey='3 PHEN',recalc=False,cortopn=None,maxp=1e-10,mincor=0.8,exp_file=None):
	return do_wgcna_prepare0(df,dflabel,datatypex=datatypex,datatypey=datatypey,recalc=recalc,cortopn=cortopn,maxP=maxp,mincor=mincor,exp_file=exp_file)

'''
Get genes-trait significance, based on 2nd half of  tutorial step 3, Identifying important genes
df   			pandas: dataframe with at least two dataypes, ie. getSetting('TYPE_EXP') and getSetting('TYPE_PHEN'), sharing common samples
dflabel 		string: unique job label
datatypex 		getSetting('TYPE_EXP')
datatypey 		getSetting('TYPE_PHEN')
recalc			boolean: result is cached using dflabel label, set to True to redo calculatons and replace cached
moduletrait     list of module,trait :  limit only to this list of module-trait
annotfle   		filename of gene annotations, file with GENE\tEXTERNAL_GENE_ID\tGENE  
				(not used here since our annotation is queried by web-service, but is required by the R script. This file may be generated on the fly from just the list of genes.
annotate		bool: if True, annotate genes fropm web-service

The R script generates gene significance GS list for all genes for each trait, dflabel-TRAIT-geneInfo.csv
All gene-trait-GS-pGS from all traits are combined, sorted and filtered according to the cut-off:
gstopn  	int: top N pairs sorted by absolute GS
maxp		float: maximum p-value
mings   	float: minimum absolute Gene Signficance
annotate 	boolean: annotate the trancrpipt genes from api/user/gene

It generates the list of top gene-traits with GS score and p-value, and gene annotatons in the file
dflabel.relateModsToExt.top.genetrait.annot.tsv

And the GS and pGS values distribution fo all module-trait considered in the files
dflabel.relateModsToExt.GS.png
dflabel.relateModsToExt.pGS.png

The return is a dataframe of top gene-trait pairs with columns ["expgene","trait","GS","pGS"]
Generates output file [dflabel].wgcna.top.tsv 
'''
def do_wgcna_modulesgenes(mydf,dflabel,datatypex='4 EXP',datatypey='3 PHEN',recalc=False,moduletrait=['brown,Total_cannabinoids'],annotfle="cs10-genes3.csv",gstopn=None,maxp=1e-10,mings=0.8,annotate=False):
	return do_wgcna_modulesgenes0(mydf,dflabel=dflabel,datatypex=datatypex,datatypey=datatypey,recalc=recalc,moduletrait=moduletrait,annotfle=annotfle,gstopn=gstopn,maxP=maxp,mings=mings,annotate=annotate)


'''
Generates dendograms and GS heatmaps for each module-trait considered, based on tutorial step 5, Network visualization using WGCNA functions
df   			pandas: dataframe with at least two dataypes, ie. getSetting('TYPE_EXP') and getSetting('TYPE_PHEN'), sharing common samples
dflabel 		string: unique job label
datatypex 		getSetting('TYPE_EXP')
datatypey 		getSetting('TYPE_PHEN')
recalc			boolean: result is cached using dflabel label, set to True to redo calculatons and replace cached
moduletrait     list of module,trait :  limit only to this list of module-trait
annotfle   		filename of gene annotations, file with GENE\tEXTERNAL_GENE_ID\tGENE  
exportNetwork   if True, generates network in VisANT and Cytoscape formats based on tutorial step 6, Export of networks to external software
'''
def do_wgcna_visualize(mydf,dflabel,datatypex='4 EXP',datatypey='3 PHEN',recalc=False, moduletrait=['brown,Total_cannabinoids'],exportNetwork=False):
	return do_wgcna_visualize0(mydf,dflabel=dflabel,datatypex=datatypex,datatypey=datatypey,recalc=recalc,moduletrait=moduletrait,exportNetwork=exportNetwork)


'''
Tools for SNP-henotype association analysis using plink --assoc feataure
'''

'''
Performs association on each phenotype 
df_snpphen		pandas: dataframe with getSetting('TYPE_PHEN') and getSetting('TYPE_SNP') datatypes, all values should have the same unit 
dflabel 		string: unique job label
phenotypes 		list of phenotypes to include
assocmethod 	string: associaton method to use based on plink options, valid values [qassoc.counts, linear, logistic]
rerunplink 		bool:	If True, regenarate plink binary from the df SNPs
maxp 			float: max p-value cutoff 
plink_file 		string: path to plink file (binary bed,bim) for large (whole genome) analysis. Plink file available for download in the site
annot 		 	string: annotation to use for the snp genes (Csativa,cs10,pkfdv1,fnfdv1,all) from api/user/gene. Set to None to not annotate
pfilter			float: minimum p-value to report in plink assoc

plotgwasi 		bool: output Manhattan plot file per phenotype
plotgwas 		bool: output Manhattan plot file with all phenotypes


This returns a dataframe to top snp-trait pairs
with columns ["trait","snp","pvalue"] 
Generates output file [dflabel].gwas.top.tsv
'''
def do_gwas(df_snpphen, dflabel,unit,phenotypes=None,assocmethod='qassoc.counts',rerunplink=True,maxp=1e-10,plink_file=None,annot=None,recalc=False,df_genes=None,covar=None,plotgwas=True, plotgwasi=False,pfilter=None):
	return do_gwas0(df_snpphen, dflabel,unit,phenotypes=phenotypes,assocmethod=assocmethod,rerunplink=rerunplink,maxp=maxp,plink_file=plink_file,annot=annot,recalc=recalc,df_genes=df_genes,covar=covar,plotgwas=plotgwas, plotgwasi=plotgwasi,pfilter=pfilter)


'''
Perform clusterinbg of genotype, and measure distance with provided samples grouping
Cluster distance is measured by VVariation of Information VI, Normalized VI, and Chi squared from contingency table statistics and pvalue
df_snpphen		pandas: dataframe with getSetting('TYPE_SNP') and getSetting('TYPE_PROP') with property 'group'
dflabel 		string: unique job label
group 			string: groperty to group
convert_group   dict: convert original group names if necessary
K 				int: number of clusters
heatmap 		bool: draw phylogenetic tree and genotype matrix as heatmap
plink_file 		string: use plink file if provided 
plink_extract 	list: when plink is used, list of tuples of genomic regions (contig,start,stop)
fastapath 		string: path to reference genome fasta (for heatmap coloring)
'''
def do_clustering(df_snp,dflabel,group,convert_group=dict(),K=2,heatmap=False,plink_file=None,plink_extract=[],recalc=False,fastapath=None):
	return do_clustering0(df_snp,dflabel,group,convert_group=convert_group,K=K,heatmap=heatmap,plink_file=plink_file,plink_extract=plink_extract,recalc=recalc,fastapath=fastapath)


'''
Same as do_clustering, but perform pre-filtering first
'''
def do_phylo(df_snpphen, dflabel, plink_file=None,recalc=False,rerunplink=False,K=2,cluster_group=None,convert_group=None,plink_extract=None,heatmap=False,drawtree=False,fastapath=None):  
	#{'BLDT':'Drug','NLDT':'Drug',',unknown':'unknown','Hemp-type':'Hemp','Drug-type':'Drug','Drug type feral':'Drug','Type I':'Drug','Type III':'Hemp'}
	return do_phylo0(df_snpphen, dflabel,plink_file=plink_file,recalc=recalc,rerunplink=rerunplink,K=K,cluster_group=cluster_group,convert_group=convert_group,plink_extract=plink_extract,heatmap=heatmap,drawtree=drawtree,fastapath=fastapath) 



'''
Generates Hapmap formatted SNP file from df SNPs
'''
def to_hapmap(mydf, outf):
	return to_hapmap0(mydf, outf)

'''
Generates ped formatted SNP file from df SNPs (text plink ped file)
'''
def to_ped(mydf, outf):
	return to_ped0(mydf, outf)


'''
Perform Expression quantitative trait loci (eQTL) analysis, eQTL using Matrix-eQTL https://www.bios.unc.edu/research/genomic_software/Matrix_eQTL
eQTL analysis links variations in gene expression levels to genotypes. This tool returns the top SNP-gene pairs

df_snpexp	pandas: dataframe with getSetting('TYPE_EXP') and getSetting('TYPE_SNP') datatypes, all values should have the same unit 
dflabel 	string: unique job label
annot 		string: gene annotation to use, if None, no annotation
df_gene 	pandas: dataframe with gene annotatons 
recodeplink bool: if True regenerates the plink file and recoding using the alleles from df_snpexp 
df_gene 	pandas: dataframe for gene annotaton generated from api/user/gene
plink_file string: path to binary plink file (binary bed,bim) for large (whole genome) analysis. Plink file available for download in the site
exp_file 	string: path of expression matrix file for large (whole transcriptome) data, as downloaded from api/user/expression/list
expgeneloc_file string: path to file contaning geneid\tcontig\tstart\tend. If not provided but df_gene is provided, df_gene location is used. Otherwise query web-service for all genes in expression matrix

This returns a datafrane of top snp-gene pairs that meet th cut-off
maxp 		float:  maximum p-value
genes_topn 	int: 	return the top pairs with unique N genes sorted by p-value
snp_topn 	int: 	return the top pairs with unique N SNPs sorted by p-value
topn 		int: 	return the top pairs sorted by p-value

This returns a tuple with three dataframe for all, cis, and trans pairs (df_all,df_cis,df_trans),  each with columns ["snp","expgene","pvalue"]
Generates three output files [dflabel].eqtl.all.top.tsv,[dflabel].eqtl.cis.top.tsv,[dflabel].eqtl.trans.top.tsv
'''
def do_matrixeqtl(df_snpexp, dflabel,annot='cs10',maxp=1e-5,genes_topn=None,snp_topn=None,topn=None,df_gene=None,recodeplink=False,plink_file=None,exp_file=None,expgeneloc_file=None,recalc=False):
	return do_matrixeqtl0(df_snpexp, dflabel,annot,maxp=maxp,genes_topn=genes_topn,snp_topn=snp_topn,topn=topn,df_gene=df_gene,recodeplink=recodeplink,plink_file=plink_file,exp_file=exp_file,expgeneloc_file=expgeneloc_file,recalc=recalc)


'''
Combine the results from the differet pairwise-omics to verify mutiple evidence.

The overview of the process includesdo_clustering
- eQTL gives top snp-expression pairs, and snp gene-expressed gene pairs
- WGCNA gives top expressed gene - trait pairs
- mGWAS gives top snp-trait, and snp gene-trait pairs

The merging connects the expression genes shared by the eQTL and WGCNA results, giving new snp gene-trait pairs
The new set of snp gene-trait pairs are then intersected with the mGWAS result.

The intersecting pairs have now two evidence paths of gene-trait association.

snp -> eQTL -> expressed genes -> WGCNA -> trait
	-> snp sene 

snp -> assoc -> trait
	-> snp gene


eqtls, wgcnas, gwass 	list of pandas or string: list of pandas dataframe and/or path to file returned/generated from the retruns of do_matrixeqtl, do_wgcna_modulesgenes[1/2/3], and do_gwas respectively
outfile 				string: label for the output files of merging results
use_snpgene 			boolean: if True, use the gene_id of locus where the SNPs are located for merging,  else use the SNP ids
df_genes  				pandas: dataframe of gene annotations from api/user/gene 
recalc 					boolean: Recalculate if already cached
requery 				boolan: Requery gene annotations from api/user/gene
maxp_wgcna,				float: further pvalue filters before merging, should be lower (more stringent) than was used by the tools 
maxp_eqtl,
maxp_gwas
'''

def merge_matrixeqtl_wgcna_gwas(eqtls=None, wgcnas=None,gwass=None,outfile=None,use_snpgene=True,df_genes=None,recalc=False,requery=False,maxp_wgcna=None,maxp_eqtl=None,maxp_gwas=None):
	return merge_matrixeqtl_wgcna_gwas0(eqtls=eqtls, wgcnas=wgcnas,gwass=gwass,outfile=outfile,use_snpgene=use_snpgene,df_genes=df_genes,recalc=recalc,requery=requery,maxp_wgcna=maxp_wgcna,maxp_eqtl=maxp_eqtl,maxp_gwas=maxp_gwas)


'''
Detect batch effects using Principal Component analysis, and correct using ComBat or empiricalBayesLM
df 			pandas: dataframe with values and batch label
dflabel 	string: job id
datatype 	data to correct, could be TYPE_PHEN or TYPE_EXP
batch_id    string: property for batch identifier. The typical batch identifier are ['NCBI BioProject', 'phenotype_dataset', 'expression_dataset']
on_missing 	string: approch to handle missing values [most_frequent,mean,median,KNNImputer,ppca]
whiten 		boolean: normalize data before PCA
correct_BE  boolean: correct the data, will return the tuple (df_corrected_data,  dict_corrected_data_bybatch)
be_method   string: batch effect correction method ['rcombat' (ComBat from R sva),'ebml' (empiricalBayesLM from R WGCNA)]  
'''
def check_batcheffects(df,dflabel, datatype, batch_id='NCBI BioProject',on_missing='most_frequent',whiten=False,correct_BE=False,be_method='rcombat'): # , 'expression_dataset','phenotype_dataset']):
	return check_batcheffects0(df, dflabel, datatype, batch_id=batch_id,on_missing=on_missing,whiten=whiten,correct_BE=correct_BE,be_method=be_method)



'''
keep common samples only between datatype1 and datatype2
returns cleaned (df[datatype1], df[datatype2], df) or df if common_only=False 
'''
def verify_samples(df, datatype1, datatype2=None, common_only=False):
	return verify_samples0(df=df, datatype1=datatype1, datatype2=datatype2, common_only=common_only)



def do_snpgenes(dfsnp, dfgenes,outfile,snpcolumn='SNP',snpset=None,other_columns=[],requery=False,how='inner'):
	return do_snps_getgenes(dfsnp, dfgenes,outfile,snpcolumn=snpcolumn,snpset=snpset,other_columns=other_columns,requery=requery,how=how)


'''
Utility tools for user interface
'''

'''
Prompt a selection number-coded 'choices', 
The user enters a list of comma separated numbers for the selection
The function returns the list of selected choices
'''
def select_options(choices,message1='Select numbers for choices, separate with comma for multiple choices',message2='',waitInput=True,selected=None):
	icount=1
	n2value=dict()
	line=""
	if waitInput:
		n2value['A']='A'
		n2value['a']='a'
		line+='[A] all\t\t'
		if message2:
			printdisplay(MESSAGE_INFO,message2)
		printdisplay(MESSAGE_INFO,message1)
	choicecol=1
	for ichoice in sorted(set(choices)):
		if choicecol==4:
			line+='['+str(icount) + '] ' + ichoice 
			if waitInput:
				printdisplay(MESSAGE_INFO,line)
			line=''
			choicecol=0
		else:
			line+='[' + str(icount) + '] ' + ichoice + "\t\t"
		n2value[str(icount)]=ichoice
		icount+=1
		choicecol+=1
	if line:
		#if waitInput:
		printdisplay(MESSAGE_INFO,line.strip())

	if waitInput:
		selectedin=input()
		selected=set()
		for isel in selectedin.split(","):
			selected.add(n2value[isel.strip()])
			if isel=='A' or isel=='a':
				selected=choices
				break

		retsel=list(sorted(selected))
		printdisplay(MESSAGE_INFO,'selected ' + str(retsel))
		return retsel
	if selected:
		selectedphen=set()
		for isel in selected: #selectedin.split(","):
			selectedphen.add(n2value[str(isel)])
			if isel=='A' or isel=='a':
				selected=choices
				break
		retsel=list(sorted(selectedphen))
		printdisplay(MESSAGE_INFO,'pre-selected ' + str(retsel))
		return retsel

	return None


# Wrapper for diplaying tabular data
def display_table(data,columns=None):
	#print(pd.DataFrame(data,columns=['expds','analysis_name']).to_string(index=False))
	df=pd.DataFrame(data,columns=columns)
	try:
		df.style.hide()
	except Exception as ex:
		#printdisplay(MESSAGE_INFO,str(ex))	
		pass
	printdisplay(MESSAGE_INFO,df)


'''
	Tar files for download
'''
def tgzdownloads(outfile, txt=True, img=True):
	osrm('dwlist.txt')
	with open('dwlist.txt','wt') as fout:
		for f in downloadfiles:
			if txt and (f.endswith('.txt') or f.endswith('.tsv') or f.endswith('.tsv')):  
				fout.write(f + "\n")
			if img and (f.endswith('.png') or f.endswith('.pdf')):  
				fout.write(f + "\n")
	ossystem("tar -cvzf " + outfile + ".tgz -T dwlist.txt")
	printdisplay(MESSAGE_INFO,"generated " + outfile + ".tgz")



'''

Compute chi-square statistics between properties groupX and groupY. 

df_snp 				pandas dataframe: should have groupX and groupY categorical properties
groupX, groupY 		string: properties
convertX, convertY 	dict: convert original values 

'''

def chisquare_properties(df_snp,groupX='cultivar_group',groupY='NCBI BioProject',convertX=dict(),convertY=dict()):
	df_hasgroup=df_snp.set_index('property',drop=False)
	xkeys=[]
	ykeys=[]
	if convertX:
		xkeys=list(convertX.keys())
		df_hasgroup=df_hasgroup.loc[:, df_hasgroup.loc[groupX].isin([TYPE_PROP,groupX] +xkeys)]
	if convertY:
		ykeys=list(convertY.keys())
		df_hasgroup=df_hasgroup.loc[:, df_hasgroup.loc[groupY].isin([TYPE_PROP,groupY] +ykeys)]

	#display(df_hasgroup)
	#convert_groupX={'cultivar_group':'cultivar_regroup', 'BLDT':'Drug','NLDT':'Drug','Hemp-type':'Hemp','Drug-type':'Drug','Drug type feral':'Drug','Type I':'Drug','Type III':'Hemp'}
	#convert_groupY={'cultivar_group':'cultivar_regroup', 'BLDT':'Drug','NLDT':'Drug','Hemp-type':'Hemp','Drug-type':'Drug','Drug type feral':'Drug','Type I':'Drug','Type III':'Hemp'}
	
	groupXmap=groupX
	groupYmap=groupY
	if convertX:
		convertX[groupX]=groupX + '_map'
		df_remap=df_hasgroup.loc[groupX,:].to_frame()
		df_remap[groupX + '_map']=df_remap[groupX].map(convertX).fillna(df_remap[groupX]) 
		df_remap=df_remap[groupX + '_map'].to_frame().T
		df_hasgroup=pd.concat([df_hasgroup, df_remap])
		groupXmap=groupX+'_map'

	if convertY:
		convertY[groupY]=groupY + '_map'
		df_remap=df_hasgroup.loc[groupY,:].to_frame()
		df_remap[groupY + '_map']=df_remap[groupY].map(convertY).fillna(df_remap[groupY]) 
		df_remap=df_remap[groupY + '_map'].to_frame().T
		df_hasgroup=pd.concat([df_hasgroup, df_remap])
		groupYmap=groupY+'_map'

	df_hasgroup=df_hasgroup.reset_index(drop=True)
	#display(df_hasgroup)

	#print('has group')
	#display(df_hasgroup)
	samples=list(df_hasgroup.columns.values)
	samples.remove('datatype')
	samples.remove('property')
	#print(samples)
	mapProj2samples=dict()
	mapRegroup2samples=dict()
	for sample in samples:
		regroup=df_hasgroup.loc[df_hasgroup['property']==groupXmap, sample].values[0]
		#print(regroup)
		if not regroup in mapRegroup2samples:
			mapRegroup2samples[regroup]=set()
		mapRegroup2samples[regroup].add(sample)
		prj=df_hasgroup.loc[df_hasgroup['property']==groupYmap, sample].values[0]
		#print(prj)
		if not prj in mapProj2samples:
			mapProj2samples[prj]=set()
		mapProj2samples[prj].add(sample)
	
	#print(mapProj2samples)
	#print(mapRegroup2samples)
	return chisquare_contingency0(mapProj2samples,mapRegroup2samples)	


def chisquare_contingency(X,Y):
	return chisquare_contingency0(X,Y)