Mercurial > repos > nanettec > split_15
view split_15/split7_15nodes.py @ 0:1cbba720061a draft
Uploaded
author | nanettec |
---|---|
date | Fri, 18 Mar 2016 05:48:30 -0400 |
parents | |
children |
line wrap: on
line source
""" @summary: SPLITTER @version 7 """ # SPLITTER --> into 15 files # Input: etraits.txt # otraits.txt # phenotypeGenotype.inp # Output: parameters.txt # qtlcart1.inp # qtlcart2.inp # qtlcart3.inp # qtlcart4.inp # qtlcart5.inp import optparse, sys import tempfile def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def __main__(): parser = optparse.OptionParser() # input files: parser.add_option("-i", "--input1", default=None, dest="input1", help="expression traits (etraits.txt) file") parser.add_option("-d", "--input2", default=None, dest="input2", help="other traits (otraits.txt) file") parser.add_option("-j", "--input3", default=None, dest="input3", help="phenotypeGenotype.inp file") # parameters: parser.add_option("-k", "--SRmodel", default=2, dest="SRmodel", help="Select the stepwise regression model to search for QTLs: 0=SF, 1=EB, 2=FB (default)") parser.add_option("-l", "--Zmodel", default=6, dest="Zmodel", help="Select interval mapping (3) or composite interval mapping (6, default)") parser.add_option("-b", "--threshold", type="float", default=11.5, dest="threshold", help="Select a LR threshold (default=11.5)") parser.add_option("-c", "--walking_speed", type="float", default=2.0, dest="walking_speed", help="Select a walking speed in cM (default=2.0)") parser.add_option("-f", "--window_size", type="float", default=10.0, dest="window_size", help="Select a window size in cM (default=10)") parser.add_option("-g", "--minimum_cM_between_QTL", type="float", default=20.0, dest="minimum_cM_between_QTL", help="Select a minimum cM between QTLs (default=20)") # output files: parser.add_option("--oa", "--output1", default=None, dest="output1", help="qtlcart1.inp file") parser.add_option("--ob", "--output2", default=None, dest="output2", help="qtlcart2.inp file") parser.add_option("--oc", "--output3", default=None, dest="output3", help="qtlcart3.inp file") parser.add_option("--od", "--output4", default=None, dest="output4", help="qtlcart4.inp file") parser.add_option("--oe", "--output5", default=None, dest="output5", help="qtlcart5.inp file") parser.add_option("--of", "--output6", default=None, dest="output6", help="qtlcart6.inp file") parser.add_option("--og", "--output7", default=None, dest="output7", help="qtlcart7.inp file") parser.add_option("--oh", "--output8", default=None, dest="output8", help="qtlcart8.inp file") parser.add_option("--oi", "--output9", default=None, dest="output9", help="qtlcart9.inp file") parser.add_option("--oj", "--output10", default=None, dest="output10", help="qtlcart10.inp file") parser.add_option("--ok", "--output11", default=None, dest="output11", help="qtlcart11.inp file") parser.add_option("--ol", "--output12", default=None, dest="output12", help="qtlcart12.inp file") parser.add_option("--om", "--output13", default=None, dest="output13", help="qtlcart13.inp file") parser.add_option("--on", "--output14", default=None, dest="output14", help="qtlcart14.inp file") parser.add_option("--oo", "--output15", default=None, dest="output15", help="qtlcart15.inp file") parser.add_option("--op", "--output16", default=None, dest="output16", help="parameters.txt file") (options, args) = parser.parse_args() # input files try: open(options.input1, "r").close() except TypeError, e: stop_err("You need to supply the expression traits (etraits.txt) file:\n" + str(e)) except IOError, e: stop_err("Can not open the expression traits (etraits.txt) file:\n" + str(e)) try: open(options.input2, "r").close() except TypeError, e: stop_err("You need to supply the other traits (otraits.txt) file:\n" + str(e)) except IOError, e: stop_err("Can not open the other traits (otraits.txt) file:\n" + str(e)) try: open(options.input3, "r").close() except TypeError, e: stop_err("You need to supply the phenotypeGenotype.inp file:\n" + str(e)) except IOError, e: stop_err("Can not open the phenotypeGenotype.inp file:\n" + str(e)) # parameters try: options.SRmodel = int(options.SRmodel) except TypeError, e: stop_err("Not an integer, 0, 1 or 2 can be selected (0=SF, 1=EB, 2=FB):\n" + str(e)) try: options.Zmodel = int(options.Zmodel) except TypeError, e: stop_err("Not an integer, 3 or 6 can be selected (3=IM, 6=CIM):\n" + str(e)) try: options.threshold = float(options.threshold) except TypeError, e: stop_err("Not a float, provide a LR threshold:\n" + str(e)) try: options.walking_speed = float(options.walking_speed) except TypeError, e: stop_err("Not a float, provide a walking speed in cM:\n" + str(e)) try: options.threshold = float(options.threshold) except TypeError, e: stop_err("Not a float, provide a LR threshold:\n" + str(e)) try: options.window_size = float(options.window_size) except TypeError, e: stop_err("Not a float, provide a window size in cM:\n" + str(e)) try: options.minimum_cM_between_QTL = float(options.minimum_cM_between_QTL) except TypeError, e: stop_err("Not a float, select a minimum cM between QTLs:\n" + str(e)) ###################################################################### # phenotype_evaluation.py ###################################################################### import subprocess import os # Create temp direcotry tempdir = tempfile.mkdtemp() #print tempdir # copy INPUT file to the temp directory s = "cp %s %s/etraits.txt" %(options.input1, tempdir) subprocess.call(s, shell=True) s = "cp %s %s/otraits.txt" %(options.input2, tempdir) subprocess.call(s, shell=True) s = "cp %s %s/phenotypeGenotype.inp" %(options.input3, tempdir) subprocess.call(s, shell=True) sr = options.SRmodel z = options.Zmodel thr = options.threshold ws = options.walking_speed win_size = options.window_size dist_QTL = options.minimum_cM_between_QTL ############################ # count number of traits in etraits.txt ############################ nodes = 15 # change only this etraits = open(tempdir+"/etraits.txt","r") traits_dict = {} traits_names = [] tot_traits = 0 for line in etraits: l = line.strip().split("\t") tot_traits += 1 if tot_traits == 1: samplesize = len(l)-1 traits_dict[l[0]]=line.strip() traits_names.append(l[0]) etraits.close() # make list with numbers per node num_traits_per_node = [] if tot_traits < nodes: empty = nodes-tot_traits non_empty = nodes-empty base = 1 for i in range(non_empty): num_traits_per_node.append(base) for j in range(empty): num_traits_per_node.append(0) else: base = tot_traits/nodes remaining = tot_traits%nodes for i in range(nodes): num_traits_per_node.append(base) for j in range(remaining): num_traits_per_node[j] = num_traits_per_node[j]+1 ############################ # count number of traits in otraits.txt ############################ otraits = open(tempdir+"/otraits.txt","r") otraits_dict = {} otraits_names = [] tot_otraits = 0 for line in otraits: l = line.strip().split("\t") tot_otraits += 1 if tot_otraits == 1: samplesize_otraits = len(l)-1 otraits_dict[l[0]]=line.strip() otraits_names.append(l[0]) etraits.close() #print "otraits samplesize = "+str(samplesize_otraits) #print "etraits samplesize = "+str(samplesize) if (tot_otraits != 0) and (samplesize_otraits == samplesize): add_otraits = "yes" else: add_otraits = "no" tot_otraits = 0 #print "add_otraits "+str(add_otraits) #print "tot_otraits "+str(tot_otraits) ############################ # create header and footer files ############################ f = open(tempdir+"/phenotypeGenotype.inp","r") line=f.readline() hdr = [] hold="NULL" # retrieve header of cross.inp while hold !="TRUE": # switch status adjustment if line[0:13]=="-start traits": hold="TRUE" hdr.append(line) if line[0:13] !="-start traits": line=f.readline() foot = [] foot.append("-stop traits\n") foot.append("-quit\n") foot.append("-end\n") foot = [] if (add_otraits == "yes"): foot.append("-stop traits\n") foot.append("-start otraits\n") for k in otraits_dict.keys(): otraits_dict[k] foot.append("+"+otraits_dict[k]+"\n") foot.append("-stop otraits\n") foot.append("-quit\n") foot.append("-end\n") else: foot.append("-stop traits\n") foot.append("-quit\n") foot.append("-end\n") #print foot ############################ # create output files ############################ done = 0 c = -1 for i in range(nodes): c += 1 for j in range(10): if hdr[j].split(" ")[0] == "-SampleSize": hdr[j]=hdr[j][0:12]+str(samplesize)+"\n" if hdr[j].split(" ")[0] == "-traits": hdr[j]=hdr[j][0:8]+str(num_traits_per_node[c])+"\n" if hdr[j].split(" ")[0] == "-otraits": hdr[j]=hdr[j][0:9]+str(tot_otraits)+"\n" f=open(tempdir+"/qtlcart"+str(c)+".inp","w") for i in hdr: f.write(i) for j in range(num_traits_per_node[c]): j += done f.write(traits_dict[traits_names[j]]+"\n") done += num_traits_per_node[c] for k in foot: f.write(k) f.close() paramters_file=open(tempdir+"/parameters.txt", "w") paramters_file.write("SRmodel\t"+str(sr)+"\nZmodel\t"+str(z)+"\nthreshold\t"+str(thr)+"\nwalking_speed\t"+str(ws)+"\nwindow_size\t"+str(win_size)+"\nminimum_cM_between_QTL\t"+str(dist_QTL)+"\n") paramters_file.close() os.system("mv %s/qtlcart0.inp %s" %(tempdir,options.output1)) os.system("mv %s/qtlcart1.inp %s" %(tempdir,options.output2)) os.system("mv %s/qtlcart2.inp %s" %(tempdir,options.output3)) os.system("mv %s/qtlcart3.inp %s" %(tempdir,options.output4)) os.system("mv %s/qtlcart4.inp %s" %(tempdir,options.output5)) os.system("mv %s/qtlcart5.inp %s" %(tempdir,options.output6)) os.system("mv %s/qtlcart6.inp %s" %(tempdir,options.output7)) os.system("mv %s/qtlcart7.inp %s" %(tempdir,options.output8)) os.system("mv %s/qtlcart8.inp %s" %(tempdir,options.output9)) os.system("mv %s/qtlcart9.inp %s" %(tempdir,options.output10)) os.system("mv %s/qtlcart10.inp %s" %(tempdir,options.output11)) os.system("mv %s/qtlcart11.inp %s" %(tempdir,options.output12)) os.system("mv %s/qtlcart12.inp %s" %(tempdir,options.output13)) os.system("mv %s/qtlcart13.inp %s" %(tempdir,options.output14)) os.system("mv %s/qtlcart14.inp %s" %(tempdir,options.output15)) os.system("mv %s/parameters.txt %s" %(tempdir,options.output16)) ###################################################################### if __name__=="__main__": __main__()