view qtlmap_15/qtlmap7_15nodes.py @ 0:fa0693cf076b draft

Uploaded
author nanettec
date Fri, 18 Mar 2016 05:51:39 -0400
parents
children
line wrap: on
line source

"""
@summary: MAP and PARSE
@version 7

"""
# MAPPER  --> save only QTL output

# Input: qtlcart.inp
#        qtlcart.map
#      	 parameters.txt

# Output: QTLs_total.txt

import optparse, sys
import tempfile
import re

def stop_err( msg ):
	sys.stderr.write( "%s\n" % msg )
	sys.exit()
    
def __main__():
	parser = optparse.OptionParser()
	parser.add_option("-i", "--input1", default=None, dest="input1", 
			  help="qtlcart.inp file")
	parser.add_option("-j", "--input2", default=None, dest="input2", 
			  help="qtlcart.map file")
	parser.add_option("-k", "--input3", default=None, dest="input3", 
			  help="parameters.txt file")
    
	parser.add_option("-o", "--output1", default=None, dest="output1", 
			  help="QTLs_total.txt file")
	(options, args) = parser.parse_args()
	
	try:
	    open(options.input1, "r").close()
	except TypeError, e:
	    stop_err("You need to supply the qtlcart.inp file:\n" + str(e))
	except IOError, e:
	    stop_err("Can not open the qtlcart.inp file:\n" + str(e))
    
	try:
	    open(options.input2, "r").close()
	except TypeError, e:
	    stop_err("You need to supply the qtlcart.map file:\n" + str(e))
	except IOError, e:
	    stop_err("Can not open the qtlcart.map file:\n" + str(e))

	try:
	    open(options.input3, "r").close()
	except TypeError, e:
	    stop_err("You need to supply the parameters.txt file:\n" + str(e))
	except IOError, e:
	    stop_err("Can not open the parameters.txt file:\n" + str(e))
	
	######################################################################
	# submit.py
	######################################################################
	
	import subprocess
	import os
	
	# Create temp direcotry
	tempdir = tempfile.mkdtemp()
	#print tempdir
	
	s = "cp %s %s/parameters.txt" %(options.input3, tempdir)
	subprocess.call(s, shell=True)
	paramters_file = open(tempdir+"/parameters.txt", "r")
	parvalues = []
	for line in paramters_file:
		l = line.strip().split("\t")
		if l[0] == "SRmodel":
			SRmodel = l[1]
		if l[0] == "Zmodel":
			Zmodel = l[1]
		if l[0] == "threshold":
			threshold = l[1]
		if l[0] == "walking_speed":
			walking_speed = l[1]
		if l[0] == "window_size":
			window_size = l[1]
		if l[0] == "minimum_cM_between_QTL":
			minimum_cM_between_QTL = l[1]
	paramters_file.close()		

	# copy INPUT file to the temp directory
	s = "cp %s %s/qtlcart.inp" %(options.input1, tempdir)
	subprocess.call(s, shell=True)
	s = "cp %s %s/qtlcart.map" %(options.input2, tempdir)
	subprocess.call(s, shell=True)
	
        f=open(tempdir+"/qtlcart.inp", "r")
	ln=f.readlines()
        f.close()
        
        if ln[0] == "NA":
		f1=open(tempdir+"/QTLs_total_parsed_LOD.txt", "w")
		f1.write("")
		f1.close()
		os.system("mv %s/QTLs_total_parsed_LOD.txt %s" %(tempdir,options.output1))
		#os.system("mv %s/qtlcart.rc %s" %(tempdir,options.output2))
        else:
		instruction = "/cluster1/bin/Rcross -i %s/qtlcart.inp -o %s/qtlcart.cro -A -V" %(tempdir, tempdir)
		os.system(instruction)
		
		instruction = "/cluster1/bin/SRmapqtl -i %s/qtlcart.cro -e %s/SRqtlcart.log -o %s/qtlcart.sr -m %s/qtlcart.map -M %s -t 9999999999 -A -V" %(tempdir, tempdir, tempdir, tempdir, str(SRmodel))
		os.system(instruction)        
		
		instruction = "/cluster1/bin/Zmapqtl -i %s/qtlcart.cro -o %s/qtlcart.z -m %s/qtlcart.map -S %s/qtlcart.sr -M %s -d %s -w %s -t 9999999999 -A -V" %(tempdir, tempdir, tempdir, tempdir, str(Zmodel), str(walking_speed), str(window_size))
		os.system(instruction)
		
		#instruction = "/cluster1/bin/Eqtl -m %s/qtlcart.map -z %s/qtlcart.z -e %s/Eqtlcart.log -o %s/qtlcart.eqt -S %s -A -V" %(tempdir, tempdir, tempdir, tempdir, str(threshold))
		#os.system(instruction)

		#####################
		# parse_eqt_file.py
		#####################
		
		infile = open(tempdir+"/qtlcart.z","r")
		outfile = open(tempdir+"/QTLs_total_parsed.txt","w")
		
		outfile.write("trait_name\ttrait_number\teQTL_number\tchr\tpeak_marker\tpeak_position\tpeak_LR\tpeak_LOD\tR2\tTR2\tS\tadditive\tdominance\n")
		
		for line in infile:
			l = line.strip().split()
			if line.startswith("-trait"):
				trait_num = l[1]
				trait_name = l[-1][1:-1]
				prev_lr_max = float(threshold)
				decrease = 0
				#print trait_num
				#print trait_name
				#print "---"
				eqtl_keep = ""
				eqtl_list = []
				prev_chromosome = 0
			try:
				ans = l[0]
			except:
				ans = ""
			if ans != "":
				if l[0].startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')):
					chromosome = l[0]
					chr_change = 0
					if chromosome != prev_chromosome:
						#print "NEW CHROMOSOME !!!!"
						chr_change = 1
						prev_chromosome = l[0]
					if chr_change == 1 and eqtl_keep != "":
						eqtl_list.append(eqtl_keep)
						#print "APPEND PREV"
						eqtl_keep = ""
						decrease = 0
						prev_lr_max = float(threshold)
						
					if float(l[3]) > float(threshold):
						#print line
						if float(l[3]) > prev_lr_max:
							eqtl_keep = line.strip()
							prev_lr_max = float(l[3])
							decrease = 0
							#print "KEEP"
						else:
							decrease += 1
							if decrease == 1:
								eqtl_list.append(eqtl_keep)
								#print "APPEND PREV"
								eqtl_keep = ""
								prev_lr_max = float(l[3])
							else:
								prev_lr_max = float(l[3])
					else:
						decrease += 1
						if decrease == 1 and eqtl_keep != "":
							eqtl_list.append(eqtl_keep)
							#print "APPEND PREV"
							eqtl_keep = ""
						decrease = 0
						prev_lr_max = float(threshold)
					#print decrease
					
			if line.startswith("-e"):
				if eqtl_keep != "":
					eqtl_list.append(eqtl_keep)
					#print "APPEND PREV"
					eqtl_keep = ""
					decrease = 0
					prev_lr_max = float(threshold)	
				#print "########"
				count = 0
				for eqtl in eqtl_list:
					e = eqtl.split()
					#print e
					count += 1
					index = str(count)
					chrom = str(e[0])
					marker = str(e[1])
					pos = str(e[2])
					lr = str(e[3])
					lod = str(0.217*float(e[3]))
					additive = str(e[6])
					dominance = str(e[8])
					r2 = str(e[4])
					tr2 = str(e[5])
					s = str(e[7])
					outfile.write(trait_name+"\t"+trait_num+"\t"+index+"\t"+chrom+"\t"+marker+"\t"+pos+"\t"+lr+"\t"+lod+"\t"+r2+"\t"+tr2+"\t"+s+"\t"+additive+"\t"+dominance+"\n")
				#print len(eqtl_list)
		infile.close()
		outfile.close()
		
		#########################
		# calc_LOD_intervals.py
		#########################
		outfile = open(tempdir+"/QTLs_total_parsed_LOD.txt","w")
		eqtfile = open(tempdir+"/QTLs_total_parsed.txt","r")
		
		eQTL_dict = {}
		eQTL_id = 0
		for line in eqtfile:
		    l = line.strip().split("\t")
		    if (l[0] != "trait_name"):
			eQTL_id += 1
			eQTL_dict[eQTL_id] = [l[0],l[3],l[4],l[5],l[7]]
		eqtfile.close()
		#print eQTL_dict
		
		for i in range(1,len(eQTL_dict)+1):
		    ans = []
		    
		    eqt_eQTL = eQTL_dict[i][0]
		    eqt_c = eQTL_dict[i][1]
		    eqt_m = eQTL_dict[i][2]
		    eqt_pos = eQTL_dict[i][3]
		    eqt_lod = eQTL_dict[i][4]
		    
		    z_eQTL = ""
		    z_id = 0
		    z_dict = {}
		    peak_id = 0
		    
		    zfile = open(tempdir+"/qtlcart.z","r")
		    for line in zfile:
			if line.startswith("-trait"):
			    l = line.strip().split("[")
			    z_eQTL = l[1][:-1]
			    #print "z_eQTL = "+z_eQTL
			    #print "eqt_eQTL = "+eqt_eQTL
			if (eqt_eQTL == z_eQTL) and (line.strip().startswith(eqt_c)):
			    if str(line.strip().split()[0]) == str(eqt_c):
				l = line.strip().split()
				z_id += 1
				z_c = l[0]
				z_m = l[1]
				z_pos = l[2]
				z_lod = float(l[3])*0.217
				z_dict[z_id] = [z_c, z_m, z_pos, z_lod]
				if float(eqt_pos) == float(z_pos):
				    peak_id = z_id
		    zfile.close()
		    
		    lod1 = float(eqt_lod)-1
		    lod2 = float(eqt_lod)-2
		    
		    # lod1_L calculation
		    #print "==== lod1_L"
		    subtract = 1
		    left_id = peak_id-1
		    if left_id != 0:
			#print "peak_id-subtract "+ str(peak_id-subtract)
			#print "left_id "+str(left_id)
			while (z_dict[peak_id-subtract][3] > lod1) and (left_id > 1):
			    subtract += 1
			    left_id = peak_id-subtract
		
			s1 = float(z_dict[left_id][2])  # small position
			s2 = float(z_dict[left_id][3])  # small lod
			l1 = float(z_dict[left_id+1][2])    # large position
			l2 = float(z_dict[left_id+1][3])    #  large lod
			
			if (l2-s2) != 0:
				lod1_L_pos = round(((lod1-s2)*(l1-s1)/(l2-s2))+s1,4)
			else:
				lod1_L_pos = round(((lod1-s2)*(l1-s1)/(0.00001))+s1,4)
			lod1_L_m = z_dict[left_id][1]
			if lod1_L_pos < 0:
			    lod1_L_pos = z_dict[1][2]
			    lod1_L_m = z_dict[1][1]
			if ((lod1_L_pos >= s1) and (lod1_L_pos <= l1)):
			    pass
			else:
			    lod1_L_pos = z_dict[1][2]
			    lod1_L_m = z_dict[1][1]    
		    else:
			lod1_L_pos = z_dict[1][2]
			lod1_L_m = z_dict[1][1]
		    ans.append(lod1_L_m)
		    ans.append(lod1_L_pos)
		    
		    # lod1_R calculation
		    #print "==== lod1_R"
		    add = 1
		    right_id = peak_id+1
		    if right_id <= max(z_dict.keys()):
			while (z_dict[peak_id+add][3] > lod1) and (right_id < max(z_dict.keys())):
			    add += 1
			    right_id = peak_id+add
			    
			s1 = float(z_dict[right_id-1][2])  # small position
			s2 = float(z_dict[right_id-1][3])  # small lod
			l1 = float(z_dict[right_id][2])    # large position
			l2 = float(z_dict[right_id][3])    #  large lod

			if (l2-s2) != 0:
				lod1_R_pos = round(((lod1-s2)*(l1-s1)/(l2-s2))+s1,4)
			else:
				lod1_R_pos = round(((lod1-s2)*(l1-s1)/(0.00001))+s1,4)
			lod1_R_m = z_dict[right_id-1][1]

			if lod1_R_pos > float(z_dict[max(z_dict.keys())][2]):
			    lod1_pos = z_dict[1][2]
			    #lod1_m = z_dict[2][1]
			    #lod1_R_pos = z_dict[max(z_dict.keys())][2]
			    lod1_R_m = z_dict[max(z_dict.keys())][1]
			if ((lod1_R_pos >= s1) and (lod1_R_pos <= l1)):
			    pass
			else:
			    lod1_R_pos = z_dict[max(z_dict.keys())][2]
			    lod1_R_m = z_dict[max(z_dict.keys())][1]
		    else:
			lod1_R_pos = z_dict[max(z_dict.keys())][2]
			lod1_R_m = z_dict[max(z_dict.keys())][1]

		    ans.append(lod1_R_m)
		    ans.append(lod1_R_pos)
		    
		    # lod2_L calculation
		    #print "==== lod2_L"
		    subtract = 1
		    left_id = peak_id-1
		    if left_id != 0 :
			while (z_dict[peak_id-subtract][3] > lod2) and (left_id > 1):
			    subtract += 1
			    left_id = peak_id-subtract
			
			s1 = float(z_dict[left_id][2])  # small position
			s2 = float(z_dict[left_id][3])  # small lod
			l1 = float(z_dict[left_id+1][2])    # large position
			l2 = float(z_dict[left_id+1][3])    #  large lod
			
			if (l2-s2) != 0:
				lod2_L_pos = round(((lod2-s2)*(l1-s1)/(l2-s2))+s1,4)
			else:
				lod2_L_pos = round(((lod2-s2)*(l1-s1)/(0.00001))+s1,4)
			
			lod2_L_m = z_dict[left_id][1]
			if lod2_L_pos < 0:
			    lod2_L_pos = z_dict[1][2]
			    lod2_L_m = z_dict[2][1]
			if ((lod2_L_pos >= s1) and (lod2_L_pos <= l1)):
			    pass
			else:
			    lod2_L_pos = z_dict[1][2]
			    lod2_L_m = z_dict[1][1]      
		    else:
			lod2_L_pos = z_dict[1][2]
			lod2_L_m = z_dict[1][1]
		    ans.append(lod2_L_m)
		    ans.append(lod2_L_pos)
		    
		    
		    # lod2_R calculation
		    #print "==== lod2_R"
		    add = 1
		    right_id = peak_id+1
		    if right_id <= max(z_dict.keys()):
			while (z_dict[peak_id+add][3] > lod2) and (right_id < max(z_dict.keys())):
			    add += 1
			    right_id = peak_id+add
			
			s1 = float(z_dict[right_id-1][2])  # small position
			s2 = float(z_dict[right_id-1][3])  # small lod
			l1 = float(z_dict[right_id][2])    # large position
			l2 = float(z_dict[right_id][3])    #  large lod
			
			if (l2-s2) != 0:
				lod2_R_pos = round(((lod2-s2)*(l1-s1)/(l2-s2))+s1,4)
			else:
				lod2_R_pos = round(((lod2-s2)*(l1-s1)/(0.00001))+s1,4)
			lod2_R_m = z_dict[right_id-1][1]

			if lod2_R_pos > float(z_dict[max(z_dict.keys())][2]):
			    lod2_R_pos = z_dict[max(z_dict.keys())][2]
			    lod2_R_m = z_dict[max(z_dict.keys())][1]
			if ((lod2_R_pos >= s1) and (lod2_R_pos <= l1)):
			    pass
			else:
			    lod2_R_pos = z_dict[max(z_dict.keys())][2]
			    lod2_R_m = z_dict[max(z_dict.keys())][1]
		    else:
			lod2_R_pos = z_dict[max(z_dict.keys())][2]
			lod2_R_m = z_dict[max(z_dict.keys())][1]
		    ans.append(lod2_R_m)
		    ans.append(lod2_R_pos)
		    
		    eQTL_dict[i].append(ans)
		    
		head = ["LOD1_L_m","LOD1_L_pos","LOD1_R_m","LOD1_R_pos","LOD2_L_m","LOD2_L_pos","LOD2_R_m","LOD2_R_pos"]
		
		i = -1
		eqtfile = open(tempdir+"/QTLs_total_parsed.txt","r")
		for line in eqtfile:
		    i += 1
		    if i == 0:
			outfile.write(line.strip()+"\t"+"\t".join(head)+"\n")
		    else: 
			outfile.write(line.strip()+"\t"+"\t".join(str(x) for x in eQTL_dict[i][5])+"\n")
		    
		eqtfile.close()
		outfile.close()
		
		#####################
		# merge_QTLs.py
		#####################
		
		infile = open(tempdir+"/QTLs_total_parsed_LOD.txt","r")
		outfile = open(tempdir+"/QTLs_total_parsed_LOD_merged.txt","w")
		
		test = ""
		for line in infile:
		    l = line.strip().split("\t")
		    if l[0] == "trait_name":
			keep = l
		    else:
			test = l
			if keep[0] != test[0]: # if trait names are not the same ---> write keep
				if keep[0] != "trait_name": # no not include header, only when integrate
					outfile.write("\t".join(keep)+"\n")
				keep = test
			else:
			    if keep[3] != test[3]: # if chrs are not the same --> write keep 
				outfile.write("\t".join(keep)+"\n")
				keep = test
			    else:
				if not (((float(keep[11])<0) and (float(test[11])<0)) or ((float(keep[11])>0) and (float(test[11])>0))):  # if additive signs are not the same --> write keep 
				    outfile.write("\t".join(keep)+"\n")
				    keep = test
				else:
				    #print str((abs(float(test[5])*100-float(keep[5])*100)))
				    if not (abs(float(test[5])*100-float(keep[5])*100)<float(minimum_cM_between_QTL)): #if minimum cM between QTLs > 20 --> write keep
					outfile.write("\t".join(keep)+"\n")
					keep = test
				    else:
					if (float(keep[7])<float(test[7])): #if LOD(keep) > LOD(test) --> new keep
					    keep = test
		
		if l[0] != "trait_name":
			outfile.write("\t".join(keep)+"\n")
					
		infile.close()
		outfile.close()
				
						
		os.system("mv %s/QTLs_total_parsed_LOD_merged.txt %s" %(tempdir,options.output1))
	
if __name__=="__main__": 
    __main__()