view split_15/split7_15nodes.py @ 0:1cbba720061a draft

Uploaded
author nanettec
date Fri, 18 Mar 2016 05:48:30 -0400
parents
children
line wrap: on
line source

"""
@summary: SPLITTER
@version 7

"""
# SPLITTER --> into 15 files

# Input: etraits.txt
#	 otraits.txt
#        phenotypeGenotype.inp

# Output: parameters.txt
#	  qtlcart1.inp
#         qtlcart2.inp
#         qtlcart3.inp
#         qtlcart4.inp
#         qtlcart5.inp

import optparse, sys
import tempfile

def stop_err( msg ):
	sys.stderr.write( "%s\n" % msg )
	sys.exit()
    
def __main__():
	parser = optparse.OptionParser()
	# input files: 
	parser.add_option("-i", "--input1", default=None, dest="input1", 
			  help="expression traits (etraits.txt) file")
	parser.add_option("-d", "--input2", default=None, dest="input2", 
			  help="other traits (otraits.txt) file")
	parser.add_option("-j", "--input3", default=None, dest="input3", 
			  help="phenotypeGenotype.inp file")

	# parameters: 
	parser.add_option("-k", "--SRmodel", default=2, dest="SRmodel", 
                      help="Select the stepwise regression model to search for QTLs: 0=SF, 1=EB, 2=FB (default)")
	parser.add_option("-l", "--Zmodel", default=6, dest="Zmodel", 
                      help="Select interval mapping (3) or composite interval mapping (6, default)")
	parser.add_option("-b", "--threshold", type="float", default=11.5, dest="threshold", 
	              help="Select a LR threshold (default=11.5)")
	parser.add_option("-c", "--walking_speed", type="float", default=2.0, dest="walking_speed", 
	              help="Select a walking speed in cM (default=2.0)")
	parser.add_option("-f", "--window_size", type="float", default=10.0, dest="window_size", 
	              help="Select a window size in cM (default=10)")
	parser.add_option("-g", "--minimum_cM_between_QTL", type="float", default=20.0, dest="minimum_cM_between_QTL", 
	              help="Select a minimum cM between QTLs (default=20)")
	
	# output files: 
	parser.add_option("--oa", "--output1", default=None, dest="output1", 
			  help="qtlcart1.inp file")
	parser.add_option("--ob", "--output2", default=None, dest="output2", 
			  help="qtlcart2.inp file")
	parser.add_option("--oc", "--output3", default=None, dest="output3", 
			  help="qtlcart3.inp file")
	parser.add_option("--od", "--output4", default=None, dest="output4", 
			  help="qtlcart4.inp file")
	parser.add_option("--oe", "--output5", default=None, dest="output5", 
			  help="qtlcart5.inp file")
	parser.add_option("--of", "--output6", default=None, dest="output6", 
			  help="qtlcart6.inp file")
	parser.add_option("--og", "--output7", default=None, dest="output7", 
			  help="qtlcart7.inp file")
	parser.add_option("--oh", "--output8", default=None, dest="output8", 
			  help="qtlcart8.inp file")
	parser.add_option("--oi", "--output9", default=None, dest="output9", 
			  help="qtlcart9.inp file")
	parser.add_option("--oj", "--output10", default=None, dest="output10", 
			  help="qtlcart10.inp file")	
	parser.add_option("--ok", "--output11", default=None, dest="output11", 
			  help="qtlcart11.inp file")
	parser.add_option("--ol", "--output12", default=None, dest="output12", 
			  help="qtlcart12.inp file")
	parser.add_option("--om", "--output13", default=None, dest="output13", 
			  help="qtlcart13.inp file")
	parser.add_option("--on", "--output14", default=None, dest="output14", 
			  help="qtlcart14.inp file")
	parser.add_option("--oo", "--output15", default=None, dest="output15", 
			  help="qtlcart15.inp file")
	
	parser.add_option("--op", "--output16", default=None, dest="output16", 
			  help="parameters.txt file")
	(options, args) = parser.parse_args()
	
	# input files
	try:
	    open(options.input1, "r").close()
	except TypeError, e:
	    stop_err("You need to supply the expression traits (etraits.txt) file:\n" + str(e))
	except IOError, e:
	    stop_err("Can not open the expression traits (etraits.txt) file:\n" + str(e))

	try:
	    open(options.input2, "r").close()
	except TypeError, e:
	    stop_err("You need to supply the other traits (otraits.txt) file:\n" + str(e))
	except IOError, e:
	    stop_err("Can not open the other traits (otraits.txt) file:\n" + str(e))
	    
	try:
	    open(options.input3, "r").close()
	except TypeError, e:
	    stop_err("You need to supply the phenotypeGenotype.inp file:\n" + str(e))
	except IOError, e:
	    stop_err("Can not open the phenotypeGenotype.inp file:\n" + str(e))

	# parameters
	try:
	    options.SRmodel = int(options.SRmodel)
        except TypeError, e:
            stop_err("Not an integer, 0, 1 or 2 can be selected (0=SF, 1=EB, 2=FB):\n" + str(e))
        
	try:
	    options.Zmodel = int(options.Zmodel)
        except TypeError, e:
            stop_err("Not an integer, 3 or 6 can be selected (3=IM, 6=CIM):\n" + str(e))
	
	try:
            options.threshold = float(options.threshold)
        except TypeError, e:
            stop_err("Not a float, provide a LR threshold:\n" + str(e))
	
	try:
            options.walking_speed = float(options.walking_speed)
        except TypeError, e:
            stop_err("Not a float, provide a walking speed in cM:\n" + str(e))
	
	try:
            options.threshold = float(options.threshold)
        except TypeError, e:
            stop_err("Not a float, provide a LR threshold:\n" + str(e))
	
	try:
            options.window_size = float(options.window_size)
        except TypeError, e:
            stop_err("Not a float, provide a window size in cM:\n" + str(e))
	    
	try:
            options.minimum_cM_between_QTL = float(options.minimum_cM_between_QTL)
        except TypeError, e:
            stop_err("Not a float, select a minimum cM between QTLs:\n" + str(e))

	######################################################################
	# phenotype_evaluation.py
	######################################################################
	
	import subprocess
	import os
	
	# Create temp direcotry
	tempdir = tempfile.mkdtemp()
	#print tempdir
	
	# copy INPUT file to the temp directory
	s = "cp %s %s/etraits.txt" %(options.input1, tempdir)
	subprocess.call(s, shell=True)
	s = "cp %s %s/otraits.txt" %(options.input2, tempdir)
	subprocess.call(s, shell=True)
	s = "cp %s %s/phenotypeGenotype.inp" %(options.input3, tempdir)
	subprocess.call(s, shell=True)
	
	
	sr = options.SRmodel
	z = options.Zmodel
	thr = options.threshold
	ws = options.walking_speed
	win_size = options.window_size
	dist_QTL = options.minimum_cM_between_QTL
	
	############################
        # count number of traits in etraits.txt
        ############################
        
	nodes = 15 # change only this
	
        etraits = open(tempdir+"/etraits.txt","r")	
        traits_dict = {}
        traits_names = []
        tot_traits = 0
        for line in etraits:
            l = line.strip().split("\t")
            tot_traits += 1
            if tot_traits == 1:
                samplesize = len(l)-1
            traits_dict[l[0]]=line.strip()
            traits_names.append(l[0])
        etraits.close()
        
        # make list with numbers per node
        num_traits_per_node = []
        
        if tot_traits < nodes:
            empty = nodes-tot_traits
            non_empty = nodes-empty
            base = 1
            for i in range(non_empty):
                num_traits_per_node.append(base)
            for j in range(empty):
                num_traits_per_node.append(0)
        else:
            base = tot_traits/nodes
            remaining = tot_traits%nodes
            for i in range(nodes):
                num_traits_per_node.append(base)
            for j in range(remaining):
                num_traits_per_node[j] = num_traits_per_node[j]+1
	
	############################
        # count number of traits in otraits.txt
        ############################

        otraits = open(tempdir+"/otraits.txt","r")	
        otraits_dict = {}
        otraits_names = []
        tot_otraits = 0
        for line in otraits:
            l = line.strip().split("\t")
            tot_otraits += 1
            if tot_otraits == 1:
                samplesize_otraits = len(l)-1
            otraits_dict[l[0]]=line.strip()
            otraits_names.append(l[0])
        etraits.close()
        #print "otraits samplesize = "+str(samplesize_otraits)
	#print "etraits samplesize = "+str(samplesize)   
	
	if (tot_otraits != 0) and (samplesize_otraits == samplesize):
		add_otraits = "yes"
	else:
		add_otraits = "no"
		tot_otraits = 0
	#print "add_otraits "+str(add_otraits)	
	#print "tot_otraits "+str(tot_otraits)
	
        ############################
        # create header and footer files
        ############################
        
        f = open(tempdir+"/phenotypeGenotype.inp","r")	
        line=f.readline()
        
	hdr = []
        hold="NULL"
        # retrieve header of cross.inp
        while hold !="TRUE":
            # switch status adjustment
            if line[0:13]=="-start traits":
                hold="TRUE"
            hdr.append(line)
	    if line[0:13] !="-start traits":
                line=f.readline()
	
        foot = []
	foot.append("-stop traits\n")
	foot.append("-quit\n")
	foot.append("-end\n") 
                
	foot = []
	if (add_otraits == "yes"):
		foot.append("-stop traits\n")
		foot.append("-start otraits\n")
		for k in otraits_dict.keys():
			otraits_dict[k]
			foot.append("+"+otraits_dict[k]+"\n")
		foot.append("-stop otraits\n")
		foot.append("-quit\n")
		foot.append("-end\n")
	else:
		foot.append("-stop traits\n")
		foot.append("-quit\n")
		foot.append("-end\n")
	#print foot
	
        ############################
        # create output files
        ############################
                
        done = 0
        c = -1
        for i in range(nodes):
            c += 1
	    for j in range(10):
		if hdr[j].split(" ")[0] == "-SampleSize":
		    hdr[j]=hdr[j][0:12]+str(samplesize)+"\n"
		if hdr[j].split(" ")[0] == "-traits":	
		    hdr[j]=hdr[j][0:8]+str(num_traits_per_node[c])+"\n"
		if hdr[j].split(" ")[0] == "-otraits":		
		    hdr[j]=hdr[j][0:9]+str(tot_otraits)+"\n"
		    
            f=open(tempdir+"/qtlcart"+str(c)+".inp","w")
            for i in hdr:
                f.write(i)
            for j in range(num_traits_per_node[c]):
                j += done
                f.write(traits_dict[traits_names[j]]+"\n")
            done += num_traits_per_node[c]
            for k in foot:
                f.write(k)
        f.close()
	
	paramters_file=open(tempdir+"/parameters.txt", "w")
	paramters_file.write("SRmodel\t"+str(sr)+"\nZmodel\t"+str(z)+"\nthreshold\t"+str(thr)+"\nwalking_speed\t"+str(ws)+"\nwindow_size\t"+str(win_size)+"\nminimum_cM_between_QTL\t"+str(dist_QTL)+"\n")
	paramters_file.close()
	
	os.system("mv %s/qtlcart0.inp %s" %(tempdir,options.output1))
	os.system("mv %s/qtlcart1.inp %s" %(tempdir,options.output2))
	os.system("mv %s/qtlcart2.inp %s" %(tempdir,options.output3))
	os.system("mv %s/qtlcart3.inp %s" %(tempdir,options.output4))
	os.system("mv %s/qtlcart4.inp %s" %(tempdir,options.output5))
	os.system("mv %s/qtlcart5.inp %s" %(tempdir,options.output6))
	os.system("mv %s/qtlcart6.inp %s" %(tempdir,options.output7))
	os.system("mv %s/qtlcart7.inp %s" %(tempdir,options.output8))
	os.system("mv %s/qtlcart8.inp %s" %(tempdir,options.output9))
	os.system("mv %s/qtlcart9.inp %s" %(tempdir,options.output10))
	os.system("mv %s/qtlcart10.inp %s" %(tempdir,options.output11))
	os.system("mv %s/qtlcart11.inp %s" %(tempdir,options.output12))
	os.system("mv %s/qtlcart12.inp %s" %(tempdir,options.output13))
	os.system("mv %s/qtlcart13.inp %s" %(tempdir,options.output14))
	os.system("mv %s/qtlcart14.inp %s" %(tempdir,options.output15))
	
	os.system("mv %s/parameters.txt %s" %(tempdir,options.output16))
	
	######################################################################
	
if __name__=="__main__": 
    __main__()