
# convert abundance matrix into bigwig for display in JBrowse tract using multibigwig https://github.com/elsiklab/multibigwig
# for each sample:   chr start end value
#
#  Requirements: bedtools
#  bedGraphToBigWig https://www.encodeproject.org/software/bedgraphtobigwig/
#  
# input files
# gff: gff file with exon and mRNA
# inf: abundance matrix of mRNA x sample
# chrlen: ccntig length file
#

import sys
import os

if False:
	gff='21trich_rna/cs10.transcripts.transdecoder.genome.gff3.clustered.gff.4tripal.gff'
	inf='abundance_cs10_rnab.isoform.TMM.EXPR.renamed.matrix'
	outdir='cs10_bw/'
	renamegene=""
	chrlen='cs10refseq.fna.seqlen.txt'

if False:
	gff='21trich_rna/pkv5.transcripts.transdecoder.genome.gff3.clustered.gff.4tripal.gff'
	inf='abundance_pkv5_rnab.isoform.TMM.EXPR.matrix'
	renamegene="PKV5R21T"
	outdir='pkv5_bw/'
	chrlen='pkv5.fna.seqlen.txt'

if True:
	gff='21trich_rna/fnv2.transcripts.transdecoder.genome.gff3.clustered.gff.4tripal.gff'
	inf='abundance_fnv2_rnab.isoform.TMM.EXPR.matrix'
	renamegene="FNV2R21T"
	outdir='fnv2_bw/'
	chrlen='fnv2.fna.seqlen.txt'


origname='RNAB'	 # replace this substring in the mRNA names in the matrix file with renamegene 
maxlengene=50000  # longest mRNA to include



srr2name=dict()

if True:  # sampple name mappings to use if display is different from names in matrix file
	with open('biomaterials_names.tsv', mode="r", encoding="utf-8") as fin:
	        line=fin.readline().strip()
	        while line:
	                try:
	                        cols=line.split("\t")
	                        srr2name[cols[1].strip()]=cols[0].strip()
	                        srr2name[cols[6].strip()]=cols[0].strip()
	                        line=fin.readline().strip()
	                except Exception as ex:
	                        print(line)
	                        raise ex

gene2exon=dict()
gene2pos=dict()
with open(gff) as fin:
	line=fin.readline()
	while line:
		cols=line.strip().split("\t")
		if len(cols)>8 and cols[2]=='mRNA':
			gene=cols[8].split(";")[0].replace("ID=","")
			genelen=abs(int(cols[4])-int(cols[3]))
			if maxlengene>genelen:
				gene2pos[gene]=cols[0]+"\t"+ str(int(cols[3])-1)+"\t"+cols[4]
		elif len(cols)>8 and cols[2]=='exon':
			gene=cols[8].split(";")[0].replace("Parent=","")
			#print('parent=' + gene)
			if gene in gene2exon:
				gene2exon[gene].append(cols[0]+"\t"+ str(int(cols[3])-1)+"\t"+cols[4])
			else:
				gene2exon[gene]=[cols[0]+"\t"+ str(int(cols[3])-1)+"\t"+cols[4]]

		line=fin.readline()
print('genes= ' + str(len(gene2pos)))
print('gene2exon= ' + str(len(gene2exon)))
print(list(gene2exon.keys())[0:10])


missing=0
hasgene=0
exonscnt=0
with open(inf) as fin:
	line=fin.readline().rstrip()
	icol=1
	sample2value=dict()
	positions=[]
	col2sample=dict()
	for sample in line.split("\t")[1:]:
		col2sample[icol]=sample
		sample2value[sample]=[]
		icol+=1
	line=fin.readline().rstrip()
	#print('samples=' + str(len(sample2value)))
	print(list(sample2value.keys()))
	while line:
		cols=line.split("\t")
		if renamegene:
			cols[0]=cols[0].replace(origname,renamegene)

		#print('cols[0]=' + cols[0])
		if cols[0] in gene2exon:
			for exon in gene2exon[cols[0]]:
				positions.append(exon)	
				exonscnt+=1
				icol=1
				for value in cols[1:]:
					sample2value[col2sample[icol]].append(value)
					icol+=1
		elif cols[0] in gene2pos:
			positions.append(gene2pos[cols[0]])
			hasgene+=1
			icol=1
			for value in cols[1:]:
				sample2value[col2sample[icol]].append(value)
				icol+=1
		else:
			missing+=1
		line=fin.readline().rstrip()	

#print('genes=' + str(len(positions)))
print('exons=' + str(len(positions)))
print('missing=' + str(missing))
print('hasgene=' + str(hasgene))
print('exonscnt=' + str(exonscnt))

url=[]
for sample in sample2value:
	sample=sample.strip()
	values=sample2value[sample]
	if len(values)!=len(positions):
		print('len(values)!=len(positions)  ' + str(len(values)) + ', ' + str(len(positions)))
		exit()

	if not sample in srr2name:
		srr2name[sample]=sample
	url.append( '{"url":"' + outdir + srr2name[sample]  + '.bw","name":"' +  srr2name[sample] + '"}',			  )
	bedfile=outdir + srr2name[sample] + '.bed'
	with open(bedfile,'w') as fout:
		irow=0
		for pos in positions:
			fout.write(pos + "\t" + values[irow] + "\n")
			irow+=1
	cmd='sort -k1,1 -k2,2n ' + bedfile + ' > ' + bedfile + '.sorted.bed'
	print(cmd)
	os.system(cmd)
	cmd='bedtools merge -c 4 -o max -i ' +  bedfile + '.sorted.bed > ' +  bedfile + '.merged.bed'
	print(cmd)
	os.system(cmd)
	cmd='./bedGraphToBigWig ' + bedfile + '.merged.bed ' + chrlen + ' ' +  outdir +  srr2name[sample]  + '.bw'	
	print(cmd)
	os.system(cmd)


print(", ".join(url))

#cs10
#genes=35148
#missing=15142
#{"url":"cs10_bw/lemon_skunk.1.bw","name":"lemon_skunk.1"}, {"url":"cs10_bw/lemon_skunk.2.bw","name":"lemon_skunk.2"}, {"url":"cs10_bw/lemon_skunk.3.bw","name":"lemon_skunk.3"}, {"url":"cs10_bw/chocolope.1.bw","name":"chocolope.1"}, {"url":"cs10_bw/chocolope.2.bw","name":"chocolope.2"}, {"url":"cs10_bw/chocolope.3.bw","name":"chocolope.3"}, {"url":"cs10_bw/afghan_kush.1.bw","name":"afghan_kush.1"}, {"url":"cs10_bw/afghan_kush.2.bw","name":"afghan_kush.2"}, {"url":"cs10_bw/afghan_kush.3.bw","name":"afghan_kush.3"}, {"url":"cs10_bw/cbd_skunk_haze.1.bw","name":"cbd_skunk_haze.1"}, {"url":"cs10_bw/cbd_skunk_haze.2.bw","name":"cbd_skunk_haze.2"}, {"url":"cs10_bw/cbd_skunk_haze.3.bw","name":"cbd_skunk_haze.3"}, {"url":"cs10_bw/blue_cheese.1.bw","name":"blue_cheese.1"}, {"url":"cs10_bw/blue_cheese.2.bw","name":"blue_cheese.2"}, {"url":"cs10_bw/blue_cheese.3.bw","name":"blue_cheese.3"}, {"url":"cs10_bw/cannbio2-stage4.4.bw","name":"cannbio2-stage4.4"}, {"url":"cs10_bw/cannbio2-stage4.2.bw","name":"cannbio2-stage4.2"}, {"url":"cs10_bw/cannbio2-stage4.1.bw","name":"cannbio2-stage4.1"}, {"url":"cs10_bw/cannbio2-stage3.6.bw","name":"cannbio2-stage3.6"}, {"url":"cs10_bw/cannbio2-stage3.3.bw","name":"cannbio2-stage3.3"}, {"url":"cs10_bw/cannbio2-stage3.2.bw","name":"cannbio2-stage3.2"}, {"url":"cs10_bw/cannbio2-stage2.5.bw","name":"cannbio2-stage2.5"}, {"url":"cs10_bw/cannbio2-stage2.3.bw","name":"cannbio2-stage2.3"}, {"url":"cs10_bw/cannbio2-stage2.1.bw","name":"cannbio2-stage2.1"}, {"url":"cs10_bw/cannbio2-stage1.6.bw","name":"cannbio2-stage1.6"}, {"url":"cs10_bw/cannbio2-stage1.5.bw","name":"cannbio2-stage1.5"}, {"url":"cs10_bw/cannbio2-stage1.3.bw","name":"cannbio2-stage1.3"}, {"url":"cs10_bw/finola_stalked.3.bw","name":"finola_stalked.3"}, {"url":"cs10_bw/finola_bulbous.3.bw","name":"finola_bulbous.3"}, {"url":"cs10_bw/finola_prestalk.1.bw","name":"finola_prestalk.1"}, {"url":"cs10_bw/finola_bulbous.1.bw","name":"finola_bulbous.1"}, {"url":"cs10_bw/finola_bulbous.2.bw","name":"finola_bulbous.2"}, {"url":"cs10_bw/finola_stalked.1.bw","name":"finola_stalked.1"}, {"url":"cs10_bw/finola_stalked.2.bw","name":"finola_stalked.2"}, {"url":"cs10_bw/finola_prestalk.2.bw","name":"finola_prestalk.2"}, {"url":"cs10_bw/finola_prestalk.3.bw","name":"finola_prestalk.3"}, {"url":"cs10_bw/sour_diesel.1.bw","name":"sour_diesel.1"}, {"url":"cs10_bw/sour_diesel.2.bw","name":"sour_diesel.2"}, {"url":"cs10_bw/sour_diesel.3.bw","name":"sour_diesel.3"}, {"url":"cs10_bw/canna_tsu.1.bw","name":"canna_tsu.1"}, {"url":"cs10_bw/canna_tsu.2.bw","name":"canna_tsu.2"}, {"url":"cs10_bw/canna_tsu.3.bw","name":"canna_tsu.3"}, {"url":"cs10_bw/black_lime.1.bw","name":"black_lime.1"}, {"url":"cs10_bw/black_lime.2.bw","name":"black_lime.2"}, {"url":"cs10_bw/black_lime.3.bw","name":"black_lime.3"}, {"url":"cs10_bw/valley_fire.1.bw","name":"valley_fire.1"}, {"url":"cs10_bw/cherry_chem.1.bw","name":"cherry_chem.1"}, {"url":"cs10_bw/terple.3.bw","name":"terple.3"}, {"url":"cs10_bw/cherry_chem.3.bw","name":"cherry_chem.3"}, {"url":"cs10_bw/cherry_chem.2.bw","name":"cherry_chem.2"}, {"url":"cs10_bw/black_berry_kush.2.bw","name":"black_berry_kush.2"}, {"url":"cs10_bw/black_berry_kush.1.bw","name":"black_berry_kush.1"}, {"url":"cs10_bw/black_berry_kush.3.bw","name":"black_berry_kush.3"}, {"url":"cs10_bw/white_cookies.3.bw","name":"white_cookies.3"}, {"url":"cs10_bw/mama_thai.1.bw","name":"mama_thai.1"}, {"url":"cs10_bw/mama_thai.2.bw","name":"mama_thai.2"}, {"url":"cs10_bw/mama_thai.3.bw","name":"mama_thai.3"}, {"url":"cs10_bw/valley_fire.2.bw","name":"valley_fire.2"}, {"url":"cs10_bw/valley_fire.3.bw","name":"valley_fire.3"}, {"url":"cs10_bw/white_cookies.1.bw","name":"white_cookies.1"}, {"url":"cs10_bw/white_cookies.2.bw","name":"white_cookies.2"}, {"url":"cs10_bw/terple.1.bw","name":"terple.1"}, {"url":"cs10_bw/terple.2.bw","name":"terple.2"}

# pkv5
#genes=36387
#missing=14953
#{"url":"pkv5_bw/lemon_skunk.1.bw","name":"lemon_skunk.1"}, {"url":"pkv5_bw/lemon_skunk.2.bw","name":"lemon_skunk.2"}, {"url":"pkv5_bw/lemon_skunk.3.bw","name":"lemon_skunk.3"}, {"url":"pkv5_bw/chocolope.1.bw","name":"chocolope.1"}, {"url":"pkv5_bw/chocolope.2.bw","name":"chocolope.2"}, {"url":"pkv5_bw/chocolope.3.bw","name":"chocolope.3"}, {"url":"pkv5_bw/afghan_kush.1.bw","name":"afghan_kush.1"}, {"url":"pkv5_bw/afghan_kush.2.bw","name":"afghan_kush.2"}, {"url":"pkv5_bw/afghan_kush.3.bw","name":"afghan_kush.3"}, {"url":"pkv5_bw/cbd_skunk_haze.1.bw","name":"cbd_skunk_haze.1"}, {"url":"pkv5_bw/cbd_skunk_haze.2.bw","name":"cbd_skunk_haze.2"}, {"url":"pkv5_bw/cbd_skunk_haze.3.bw","name":"cbd_skunk_haze.3"}, {"url":"pkv5_bw/blue_cheese.1.bw","name":"blue_cheese.1"}, {"url":"pkv5_bw/blue_cheese.2.bw","name":"blue_cheese.2"}, {"url":"pkv5_bw/blue_cheese.3.bw","name":"blue_cheese.3"}, {"url":"pkv5_bw/cannbio2-stage4.4.bw","name":"cannbio2-stage4.4"}, {"url":"pkv5_bw/cannbio2-stage4.2.bw","name":"cannbio2-stage4.2"}, {"url":"pkv5_bw/cannbio2-stage4.1.bw","name":"cannbio2-stage4.1"}, {"url":"pkv5_bw/cannbio2-stage3.6.bw","name":"cannbio2-stage3.6"}, {"url":"pkv5_bw/cannbio2-stage3.3.bw","name":"cannbio2-stage3.3"}, {"url":"pkv5_bw/cannbio2-stage3.2.bw","name":"cannbio2-stage3.2"}, {"url":"pkv5_bw/cannbio2-stage2.5.bw","name":"cannbio2-stage2.5"}, {"url":"pkv5_bw/cannbio2-stage2.3.bw","name":"cannbio2-stage2.3"}, {"url":"pkv5_bw/cannbio2-stage2.1.bw","name":"cannbio2-stage2.1"}, {"url":"pkv5_bw/cannbio2-stage1.6.bw","name":"cannbio2-stage1.6"}, {"url":"pkv5_bw/cannbio2-stage1.5.bw","name":"cannbio2-stage1.5"}, {"url":"pkv5_bw/cannbio2-stage1.3.bw","name":"cannbio2-stage1.3"}, {"url":"pkv5_bw/finola_stalked.3.bw","name":"finola_stalked.3"}, {"url":"pkv5_bw/finola_bulbous.3.bw","name":"finola_bulbous.3"}, {"url":"pkv5_bw/finola_prestalk.1.bw","name":"finola_prestalk.1"}, {"url":"pkv5_bw/finola_bulbous.1.bw","name":"finola_bulbous.1"}, {"url":"pkv5_bw/finola_bulbous.2.bw","name":"finola_bulbous.2"}, {"url":"pkv5_bw/finola_stalked.1.bw","name":"finola_stalked.1"}, {"url":"pkv5_bw/finola_stalked.2.bw","name":"finola_stalked.2"}, {"url":"pkv5_bw/finola_prestalk.2.bw","name":"finola_prestalk.2"}, {"url":"pkv5_bw/finola_prestalk.3.bw","name":"finola_prestalk.3"}, {"url":"pkv5_bw/sour_diesel.1.bw","name":"sour_diesel.1"}, {"url":"pkv5_bw/sour_diesel.2.bw","name":"sour_diesel.2"}, {"url":"pkv5_bw/sour_diesel.3.bw","name":"sour_diesel.3"}, {"url":"pkv5_bw/canna_tsu.1.bw","name":"canna_tsu.1"}, {"url":"pkv5_bw/canna_tsu.2.bw","name":"canna_tsu.2"}, {"url":"pkv5_bw/canna_tsu.3.bw","name":"canna_tsu.3"}, {"url":"pkv5_bw/black_lime.1.bw","name":"black_lime.1"}, {"url":"pkv5_bw/black_lime.2.bw","name":"black_lime.2"}, {"url":"pkv5_bw/black_lime.3.bw","name":"black_lime.3"}, {"url":"pkv5_bw/valley_fire.1.bw","name":"valley_fire.1"}, {"url":"pkv5_bw/cherry_chem.1.bw","name":"cherry_chem.1"}, {"url":"pkv5_bw/terple.3.bw","name":"terple.3"}, {"url":"pkv5_bw/cherry_chem.3.bw","name":"cherry_chem.3"}, {"url":"pkv5_bw/cherry_chem.2.bw","name":"cherry_chem.2"}, {"url":"pkv5_bw/black_berry_kush.2.bw","name":"black_berry_kush.2"}, {"url":"pkv5_bw/black_berry_kush.1.bw","name":"black_berry_kush.1"}, {"url":"pkv5_bw/black_berry_kush.3.bw","name":"black_berry_kush.3"}, {"url":"pkv5_bw/white_cookies.3.bw","name":"white_cookies.3"}, {"url":"pkv5_bw/mama_thai.1.bw","name":"mama_thai.1"}, {"url":"pkv5_bw/mama_thai.2.bw","name":"mama_thai.2"}, {"url":"pkv5_bw/mama_thai.3.bw","name":"mama_thai.3"}, {"url":"pkv5_bw/valley_fire.2.bw","name":"valley_fire.2"}, {"url":"pkv5_bw/valley_fire.3.bw","name":"valley_fire.3"}, {"url":"pkv5_bw/white_cookies.1.bw","name":"white_cookies.1"}, {"url":"pkv5_bw/white_cookies.2.bw","name":"white_cookies.2"}, {"url":"pkv5_bw/terple.1.bw","name":"terple.1"}, {"url":"pkv5_bw/terple.2.bw","name":"terple.2"}

#genes=37341
#missing=14089
#{"url":"fnv2_bw/lemon_skunk.1.bw","name":"lemon_skunk.1"}, {"url":"fnv2_bw/lemon_skunk.2.bw","name":"lemon_skunk.2"}, {"url":"fnv2_bw/lemon_skunk.3.bw","name":"lemon_skunk.3"}, {"url":"fnv2_bw/chocolope.1.bw","name":"chocolope.1"}, {"url":"fnv2_bw/chocolope.2.bw","name":"chocolope.2"}, {"url":"fnv2_bw/chocolope.3.bw","name":"chocolope.3"}, {"url":"fnv2_bw/afghan_kush.1.bw","name":"afghan_kush.1"}, {"url":"fnv2_bw/afghan_kush.2.bw","name":"afghan_kush.2"}, {"url":"fnv2_bw/afghan_kush.3.bw","name":"afghan_kush.3"}, {"url":"fnv2_bw/cbd_skunk_haze.1.bw","name":"cbd_skunk_haze.1"}, {"url":"fnv2_bw/cbd_skunk_haze.2.bw","name":"cbd_skunk_haze.2"}, {"url":"fnv2_bw/cbd_skunk_haze.3.bw","name":"cbd_skunk_haze.3"}, {"url":"fnv2_bw/blue_cheese.1.bw","name":"blue_cheese.1"}, {"url":"fnv2_bw/blue_cheese.2.bw","name":"blue_cheese.2"}, {"url":"fnv2_bw/blue_cheese.3.bw","name":"blue_cheese.3"}, {"url":"fnv2_bw/cannbio2-stage4.4.bw","name":"cannbio2-stage4.4"}, {"url":"fnv2_bw/cannbio2-stage4.2.bw","name":"cannbio2-stage4.2"}, {"url":"fnv2_bw/cannbio2-stage4.1.bw","name":"cannbio2-stage4.1"}, {"url":"fnv2_bw/cannbio2-stage3.6.bw","name":"cannbio2-stage3.6"}, {"url":"fnv2_bw/cannbio2-stage3.3.bw","name":"cannbio2-stage3.3"}, {"url":"fnv2_bw/cannbio2-stage3.2.bw","name":"cannbio2-stage3.2"}, {"url":"fnv2_bw/cannbio2-stage2.5.bw","name":"cannbio2-stage2.5"}, {"url":"fnv2_bw/cannbio2-stage2.3.bw","name":"cannbio2-stage2.3"}, {"url":"fnv2_bw/cannbio2-stage2.1.bw","name":"cannbio2-stage2.1"}, {"url":"fnv2_bw/cannbio2-stage1.6.bw","name":"cannbio2-stage1.6"}, {"url":"fnv2_bw/cannbio2-stage1.5.bw","name":"cannbio2-stage1.5"}, {"url":"fnv2_bw/cannbio2-stage1.3.bw","name":"cannbio2-stage1.3"}, {"url":"fnv2_bw/finola_stalked.3.bw","name":"finola_stalked.3"}, {"url":"fnv2_bw/finola_bulbous.3.bw","name":"finola_bulbous.3"}, {"url":"fnv2_bw/finola_prestalk.1.bw","name":"finola_prestalk.1"}, {"url":"fnv2_bw/finola_bulbous.1.bw","name":"finola_bulbous.1"}, {"url":"fnv2_bw/finola_bulbous.2.bw","name":"finola_bulbous.2"}, {"url":"fnv2_bw/finola_stalked.1.bw","name":"finola_stalked.1"}, {"url":"fnv2_bw/finola_stalked.2.bw","name":"finola_stalked.2"}, {"url":"fnv2_bw/finola_prestalk.2.bw","name":"finola_prestalk.2"}, {"url":"fnv2_bw/finola_prestalk.3.bw","name":"finola_prestalk.3"}, {"url":"fnv2_bw/sour_diesel.1.bw","name":"sour_diesel.1"}, {"url":"fnv2_bw/sour_diesel.2.bw","name":"sour_diesel.2"}, {"url":"fnv2_bw/sour_diesel.3.bw","name":"sour_diesel.3"}, {"url":"fnv2_bw/canna_tsu.1.bw","name":"canna_tsu.1"}, {"url":"fnv2_bw/canna_tsu.2.bw","name":"canna_tsu.2"}, {"url":"fnv2_bw/canna_tsu.3.bw","name":"canna_tsu.3"}, {"url":"fnv2_bw/black_lime.1.bw","name":"black_lime.1"}, {"url":"fnv2_bw/black_lime.2.bw","name":"black_lime.2"}, {"url":"fnv2_bw/black_lime.3.bw","name":"black_lime.3"}, {"url":"fnv2_bw/valley_fire.1.bw","name":"valley_fire.1"}, {"url":"fnv2_bw/cherry_chem.1.bw","name":"cherry_chem.1"}, {"url":"fnv2_bw/terple.3.bw","name":"terple.3"}, {"url":"fnv2_bw/cherry_chem.3.bw","name":"cherry_chem.3"}, {"url":"fnv2_bw/cherry_chem.2.bw","name":"cherry_chem.2"}, {"url":"fnv2_bw/black_berry_kush.2.bw","name":"black_berry_kush.2"}, {"url":"fnv2_bw/black_berry_kush.1.bw","name":"black_berry_kush.1"}, {"url":"fnv2_bw/black_berry_kush.3.bw","name":"black_berry_kush.3"}, {"url":"fnv2_bw/white_cookies.3.bw","name":"white_cookies.3"}, {"url":"fnv2_bw/mama_thai.1.bw","name":"mama_thai.1"}, {"url":"fnv2_bw/mama_thai.2.bw","name":"mama_thai.2"}, {"url":"fnv2_bw/mama_thai.3.bw","name":"mama_thai.3"}, {"url":"fnv2_bw/valley_fire.2.bw","name":"valley_fire.2"}, {"url":"fnv2_bw/valley_fire.3.bw","name":"valley_fire.3"}, {"url":"fnv2_bw/white_cookies.1.bw","name":"white_cookies.1"}, {"url":"fnv2_bw/white_cookies.2.bw","name":"white_cookies.2"}, {"url":"fnv2_bw/terple.1.bw","name":"terple.1"}, {"url":"fnv2_bw/terple.2.bw","name":"terple.2"}
