Mercurial > repos > nanettec > classifier
view classifier/classifier.py @ 1:d8547ff82697 draft default tip
Deleted selected files
author | nanettec |
---|---|
date | Fri, 18 Mar 2016 05:16:02 -0400 |
parents | ef9c2044d86a |
children |
line wrap: on
line source
""" @summary: Classify eQTLs as cis or trans - calculate "hybrid" overlap score @author: nanette.coetzer@gmail.com @version 5 """ import optparse, sys import tempfile import os, re import subprocess def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def __main__(): #Parse Command Line parser = optparse.OptionParser() parser.add_option("-m", "--rscript", default=None, dest="rscript", help="R script") parser.add_option("-i", "--input1", default=None, dest="input1", help="eQTLs file") parser.add_option("-j", "--input2", default=None, dest="input2", help="Chr summary file") parser.add_option("-k", "--input3", default=None, dest="input3", help="Gene positions file") parser.add_option("-l", "--input4", default=None, dest="input4", help="Lookup file") #parser.add_option("-m", "--score_type", default=2, dest="score_type", # help="Select dM scale (1) or hybrid (2)") parser.add_option("-o", "--output1", default=None, dest="output1", help="All classification file") parser.add_option("-p", "--output2", default=None, dest="output2", help="Cis classification file") parser.add_option("-q", "--output3", default=None, dest="output3", help="Trans classification file") parser.add_option("-r", "--output4", default=None, dest="output4", help="Classification summary file") parser.add_option("-s", "--output5", default=None, dest="output5", help="Chr summary file v2") parser.add_option("-t", "--output6", default=None, dest="output6", help="Gene positions file v2") parser.add_option("-u", "--output7", default=None, dest="output7", help="eQTLs per gene summary file") parser.add_option("-v", "--output8", default=None, dest="output8", help="eQTL vs gene position plot") (options, args) = parser.parse_args() try: open(options.input1, "r").close() except TypeError, e: stop_err("You need to supply the eQTL file:\n" + str(e)) except IOError, e: stop_err("Can not open the eQTL file:\n" + str(e)) try: open(options.input2, "r").close() except TypeError, e: stop_err("You need to supply the Chr summary file:\n" + str(e)) except IOError, e: stop_err("Can not open the Chr summary file:\n" + str(e)) try: open(options.input3, "r").close() except TypeError, e: stop_err("You need to supply the Gene positions file:\n" + str(e)) except IOError, e: stop_err("Can not open the Gene positions file:\n" + str(e)) try: open(options.input4, "r").close() except TypeError, e: stop_err("You need to supply the Lookup file:\n" + str(e)) except IOError, e: stop_err("Can not open the Lookup file:\n" + str(e)) ############################################## lookup_list = [] # Convert eQTL input file, change marker number of last interval in each chromosome check2 = "" prev_m = "0" prev_c = "1" r = re.compile("^(\d+)\s+(\d+)\s+(\d+.\d+)\s+") indict = {} max_cM_dict = {} lookup = open(options.input4,'r') for line in lookup: l = line.strip().split("\t") if l[6] == "0": #print l max_cM_dict[l[1]] = l[4] lookup_list.append(l) if l[0] != "id" and l[6] != "0": c = l[1] if prev_c != c: indict[prev_c+"\t"+str(float(p))] = prev_c+"\t"+m+"\t"+str(float(p)) m = l[2] p = l[3] prev_c = c #print max_cM_dict #add last line of last chromosome indict[prev_c+"\t"+p] = prev_c+"\t"+m+"\t"+p #print indict temp_file = tempfile.mktemp() eqtl_temp_output = open(temp_file,'w') eQTL_size = 0 eQTL_nr = 0 eqtl_in = open(options.input1,'r') for line in eqtl_in: l = line.strip().split("\t") if l[3].startswith("c"): header = 1 # yes else: header = 0 # no if header != 1: sfull = l[3]+"\t"+l[13]+"\t"+str(float(l[14])) efull = l[3]+"\t"+l[15]+"\t"+str(float(l[16])) pfull = l[3]+"\t"+l[4]+"\t"+str(float(l[5])) sshort = l[3]+"\t"+str(float(l[14])) eshort = l[3]+"\t"+str(float(l[16])) pshort = l[3]+"\t"+str(float(l[5])) if sshort in indict.keys(): sfull = indict[sshort] if eshort in indict.keys(): efull = indict[eshort] if pshort in indict.keys(): pfull = indict[pshort] s = sfull.split("\t") e = efull.split("\t") p = pfull.split("\t") eqtl_temp_output.write(l[0]+"\t"+l[1]+"\t"+s[0]+"\t"+s[1]+"\t"+s[2]+"\t"+e[1]+"\t"+e[2]+"\t"+p[1]+"\t"+p[2]+"\t"+l[6]+"\t"+l[8]+"\t"+l[9]+"\t"+l[11]+"\n") #print l[0]+"\t"+l[1]+"\t"+s[0]+"\t"+s[1]+"\t"+s[2]+"\t"+e[1]+"\t"+e[2]+"\t"+p[1]+"\t"+p[2]+"\t"+l[6]+"\t"+l[8]+"\t"+l[9]+"\t"+l[11]+"\n" eQTL_size += (float(l[16])*100)-(float(l[14])*100) eQTL_nr += 1 lookup.close() eqtl_in.close() eqtl_temp_output.close() cutoff = (eQTL_size/float(eQTL_nr))/2 #print "cutoff: ",cutoff ############################################# cis = 0 trans = 0 no_result = 0 total = 0 cis_peakLR = 0 cis_rsq = 0 cis_rtsq = 0 trans_peakLR = 0 trans_rsq = 0 trans_rtsq = 0 nr_peakLR = 0 nr_rsq = 0 nr_rtsq = 0 tot_peakLR = 0 tot_rsq = 0 tot_rtsq = 0 # Create 2 dictionaries with gene info: gene_dict and gene_bin_dict gene_positions = open(options.input3,'r') # gene_pos.txt gene_dict = {} gene_bin_dict = {} for line in gene_positions: l = line.strip().split("\t") if l[0].startswith("gene") or l[0].startswith("Gene") or l[0].startswith("name") or l[0].startswith("Name"): header = 1 # yes else: header = 0 # no if header != 1: # create a gene dictionary form gene positions file gene_dict[l[0]] = l[1:]+[0,0,0,0] chr_gene = l[1] bp_gene = round((int(l[2])+int(l[3]))/2.0,0) # for each gene, get the correct bin from the lookup file # first read lookup file into a list of lists (lookup_list), faster gene_bin = "NA" #print lookup_list for lu in lookup_list: if (lu[1]==chr_gene) and (bp_gene>=int(float(lu[5]))) and (float(lu[6])!=0): gene_bin = lu[0] gene_bin_dict[l[0]]=gene_bin gene_positions.close() #print "Done 2" eqtl_outfile = open(options.output1,'w') # classify.txt eqtl_outfile.write("gene\tindex\tchr\tstart_marker\tstart_int\tend_marker\tend_int\tpeak_marker\tpeak_int\tpeakLR\trsq\trtsq\tadditive\tclassification\teQTL_bin\tgene_bin\toverlap_score\tstatus\n") max_peakLR = 0.0 eqtl_get_maxLR = open(temp_file,'r') for eqtlline in eqtl_get_maxLR: l = eqtlline.strip().split("\t") if l[1].startswith("i"): header = 1 # yes else: header = 0 # no if header != 1: if ((l[9] != 'inf') and (float(l[9]) > max_peakLR)): max_peakLR = float(l[9]) eqtl_get_maxLR.close() count = 0 position = 0 status = 0 x2 = "" eqtl = open(temp_file,'r') # eQTLs.txt for eqtlline in eqtl: classification = "" l = eqtlline.strip().split("\t") if l[1].startswith("i"): header = 1 # yes else: header = 0 # no if header != 1: gene = l[0] # for each eqtl, get the correct bin from the lookup file (lookup_list) eqtl_bin = "NA" for lu in lookup_list: if (lu[1]==l[2]) and (float(l[8])>=float(lu[3])) and (float(lu[6])!=0): eqtl_bin = lu[0] try: g = gene_dict[gene] position = 1 except: # if the position of the gene is not known, then eQTL cannot be classified classification = "no_result" overlap_score = 0 position = 0 full_partial = "." if position == 1: gene_chr = g[0] eqtl_chr = l[2] # if the gene and eQTL on different chromosomes, then eQTL classified as trans if gene_chr != eqtl_chr: classification = "trans" overlap_score = 0 full_partial = "." # if the gene and eQTL on the same chromosome, then test if cis / trans else: #print "else" gene_bp = int(round((int(g[1])+int(g[2]))/float(2),0)) prev_bp = 0 prev_line = [0,0,0,0.0,0.0,0] lookup = open(options.input4,'r') # lookup.txt status = 0 for line in lookup: look = line.strip().split("\t") if look[1] == gene_chr: # for the gene position in bp (v1), calculate the best estimate cM position (x2) # if gene is NOT to the left of the 1st marker (< 0 cM) or to the right of the last marker, x2 will be calculated and status will be 1 if (int(float(gene_bp)) > int(float(prev_bp))) and (int(float(gene_bp)) <= int(float(look[5]))): # 2 : cM and 1 : bp #print "gene_bp = "+str(gene_bp) #print "prev_bp = "+str(prev_bp) #print "look[5] = "+str(look[5]) v1 = int(gene_bp) s1 = int(prev_bp) l1 = int(float(look[5])) s2 = float(prev_line[4]) l2 = float(look[4]) x2 = round(((v1-s1)*(l2-s2)/(l1-s1))+s2,2) status = 1 #print "x2 = "+str(x2) prev_bp = look[5] prev_line = look lookup.close() #print "status = "+str(status)+"\n" if status == 0: #print "*********" * 100 #print "gene at end of chr\n\n\n" x2 = round(float(max_cM_dict[gene_chr]),2) #print x2 g_bin = "NA" # if the gene is to the left of the 1st marker (< 0 cM) or to the right of the last marker; this could be improved (for the maize data it is fine) #classification = "no_result" #print classification if classification != "if status == 0:": # get the eQTL start and end cM positions (from lookup table) #lookup = open(options.input4,'r') # lookup.txt #count = 0 #for line in lookup: # count += 1 # if count > 1: # look = line.strip().split("\t") # if int(look[1]) == int(eqtl_chr) and float(look[3]) == float(l[4]): # eqtl_start = float(look[4]) # if int(look[1]) == int(eqtl_chr) and float(look[3]) == float(l[6]): # eqtl_end = float(look[4]) #lookup.close() # compare calculated gene cM position (x2) with eQTL start and end cM positions # calculate a region around the gene (5 cM to either side) and test whether (i) eQTL start or (ii) eQTL end overlaps with that region; or (iii) whether the eQTL is entirely in that region eqtl_start = float(l[4])*100 eqtl_end = float(l[6])*100 eqtl_peak = float(l[8])*100 #cutoff = 5 if (int(gene_chr) == int(eqtl_chr)) and ((eqtl_start >= x2-float(cutoff) and eqtl_start <= x2+float(cutoff)) or (eqtl_end >= x2-float(cutoff) and eqtl_end <= x2+float(cutoff)) or (eqtl_start <= x2-float(cutoff) and eqtl_end >= x2+float(cutoff))): classification = "cis" #if options.score_type == str(1): # # calculate overlap score (dM score) # start_end_list = [float(eqtl_start/10), float(eqtl_end/10), (x2-float(cutoff))/10, (x2+float(cutoff))/10] # start_end_list.sort() # # ratio = ((start_end_list[2]-start_end_list[1])+1)/float((start_end_list[3]-start_end_list[0])+1) # diff_peaks = float(abs((x2/10)-float(eqtl_peak/10)))+1 # overlap_score = float(ratio/diff_peaks) #else: # # calculate overlap score (hybrid score: full overlap (both peaks in overlapping region) = M scale; partial overlap = dM scale) # # Example of scale: 0.0801 M = 0.801 dM = 8.01 cM start_end_list = [float(eqtl_start/10),float(eqtl_end/10),(x2-float(cutoff))/10, (x2+float(cutoff))/10] start_end_list.sort() ######### test for full / partial overlap ######### if ((float(eqtl_peak/10) >= start_end_list[1]) and (float(eqtl_peak/10) <= start_end_list[2])) \ and ((x2/10 >= start_end_list[1]) and (x2/10 <= start_end_list[2])): ratio = ((start_end_list[2]/10-start_end_list[1]/10)+1)/float((start_end_list[3]/10-start_end_list[0]/10)+1) diff_peaks = float(abs(x2/100-float(eqtl_peak/100)))+1 overlap_score = float(ratio/diff_peaks) full_partial = "full" else: # partial overlap --> use dM scale (multiply position by 10) --> already done ratio = ((start_end_list[2]-start_end_list[1])+1)/float((start_end_list[3]-start_end_list[0])+1) diff_peaks = float(abs(x2/10-float(eqtl_peak/10)))+1 overlap_score = float(ratio/diff_peaks) full_partial = "partial" else: classification = "trans" overlap_score = 0 full_partial = "." if classification == "cis": cis += 1 cis_peakLR += float(l[9]) cis_rsq += float(l[10]) cis_rtsq += float(l[11]) g[4] += 1 if classification == "trans": trans += 1 trans_peakLR += float(l[9]) trans_rsq += float(l[10]) trans_rtsq += float(l[11]) g[5] += 1 if classification == "no_result": no_result += 1 nr_peakLR += float(l[9]) nr_rsq += float(l[10]) nr_rtsq += float(l[11]) g[6] += 1 g[3] += 1 total += 1 tot_peakLR += float(l[9]) tot_rsq += float(l[10]) tot_rtsq += float(l[11]) try: g_bin = gene_bin_dict[l[0]] except: g_bin = "NA" if status == 0: #g_bin = "end c"+str(gene_chr) g_bin = "NA" gene_bin_dict[l[0]] = g_bin if x2 == 0.0: #g_bin = "start c"+str(gene_chr) g_bin = "NA" gene_bin_dict[l[0]] = g_bin if l[9] != "inf": eqtl_outfile.write(eqtlline.strip()+"\t"+classification+"\t"+eqtl_bin+"\t"+str(g_bin)+"\t"+str(overlap_score)+"\t"+str(full_partial)+"\n") else: #eqtl_outfile.write(eqtlline.strip()+"\t"+classification+"\t"+eqtl_bin+"\t"+str(g_bin)+"\n") temp_line = "\t".join(l[0:9])+"\t"+str(max_peakLR)+"\t"+"\t".join(l[10:]) eqtl_outfile.write(temp_line+"\t"+classification+"\t"+eqtl_bin+"\t"+str(g_bin)+"\t"+str(overlap_score)+"\t"+str(full_partial)+"\n") #print(eqtlline.strip()+"\t"+classification+"\t"+eqtl_bin+"\t"+str(g_bin)) eqtl.close() eqtl_outfile.close() # write cis and trans classification files cis_chr = [] trans_chr = [] all_chr = [] unknown_chr = [] all_outfile = open(options.output1,'r') # all classify.txt cis_outfile = open(options.output2,'w') # cis classify.txt cis_outfile.write("gene\tindex\tchr\tstart_marker\tstart_int\tend_marker\tend_int\tpeak_marker\tpeak_int\tpeakLR\trsq\trtsq\tparent_up_reg\tclassification\teQTL_bin\tgene_bin\toverlap_score\tstatus\n") trans_outfile = open(options.output3,'w') # trans classify.txt trans_outfile.write("gene\tindex\tchr\tstart_marker\tstart_int\tend_marker\tend_int\tpeak_marker\tpeak_int\tpeakLR\trsq\trtsq\tparent_up_reg\tclassification\teQTL_bin\tgene_bin\toverlap_score\tstatus\n") for allline in all_outfile: all_chr.append(allline.strip().split("\t")[2]) if allline.strip().split("\t")[13] == "cis": cis_outfile.write(allline.strip()+"\n") cis_chr.append(allline.strip().split("\t")[2]) if allline.strip().split("\t")[13] == "trans": trans_outfile.write(allline.strip()+"\n") trans_chr.append(allline.strip().split("\t")[2]) if allline.strip().split("\t")[13] == "no_result": unknown_chr.append(allline.strip().split("\t")[2]) cis_outfile.close() trans_outfile.close() all_outfile.close() # calcualte summary statistics p_cis = round((cis/float(total))*100,2) p_trans = round((trans/float(total))*100,2) p_no_result = round((no_result/float(total))*100,2) p_total = round((total/float(total))*100,2) mean_cis_peakLR = round(cis_peakLR/float(cis),1) mean_cis_rsq = round(cis_rsq/float(cis),2) mean_cis_rtsq = round(cis_rtsq/float(cis),2) mean_trans_peakLR = round(trans_peakLR/float(trans),1) mean_trans_rsq = round(trans_rsq/float(trans),2) mean_trans_rtsq = round(trans_rtsq/float(trans),2) if no_result != 0: mean_nr_peakLR = round(nr_peakLR/float(no_result),1) mean_nr_rsq = round(nr_rsq/float(no_result),2) mean_nr_rtsq = round(nr_rtsq/float(no_result),2) else: mean_nr_peakLR = "-" mean_nr_rsq = "-" mean_nr_rtsq = "-" mean_tot_peakLR = round(tot_peakLR/float(total),1) mean_tot_rsq = round(tot_rsq/float(total),2) mean_tot_rtsq = round(tot_rtsq/float(total),2) cis_summary = "cis\t"+str(cis)+"\t"+ str(p_cis)+"%\t"+str(mean_cis_peakLR)+"\t"+str(mean_cis_rsq)+"\t"+str(mean_cis_rtsq)+"\n" trans_summary = "trans\t"+str(trans)+"\t"+ str(p_trans)+"%\t"+str(mean_trans_peakLR)+"\t"+str(mean_trans_rsq)+"\t"+str(mean_trans_rtsq)+"\n" no_result_summary = "unknown\t"+str(no_result)+"\t"+ str(p_no_result)+"%\t"+str(mean_nr_peakLR)+"\t"+str(mean_nr_rsq)+"\t"+str(mean_nr_rtsq)+"\n" total_summary = "all\t"+str(total)+"\t"+ str(p_total)+"%\t"+str(mean_tot_peakLR)+"\t"+str(mean_tot_rsq)+"\t"+str(mean_tot_rtsq)+"\n" summary_outfile = open(options.output4,'w') # classify_summary.txt summary_outfile.write("class\tnumber_eQTLs\tpercentage_eQTLs\taverage_peakLR\taverage_rsq\taverage_rtsq\n") summary_outfile.write(cis_summary+trans_summary+no_result_summary+total_summary) summary_outfile.close() genes_chr = [] for g in gene_dict.keys(): genes_chr.append(gene_dict[g][0]) count_genes = {} for i in set(genes_chr): count_genes[i] = genes_chr.count(i) count_cis = {} for i in set(cis_chr): count_cis[i] = cis_chr.count(i) count_trans = {} for i in set(trans_chr): count_trans[i] = trans_chr.count(i) count_all = {} for i in set(all_chr): count_all[i] = all_chr.count(i) count_unknown = {} for i in set(unknown_chr): count_unknown[i] = unknown_chr.count(i) i = 0 tot_genes = 0 tot_cis = 0 tot_trans = 0 tot_unknown = 0 tot_all = 0 chr_summary_v2 = open(options.output5,'w') chr_summary = open(options.input2,'r') for line in chr_summary: i += 1 if i == 1: chr_summary_v2.write(line.strip()+"\tgenes\tcis eQTL\ttrans eQT\tunknown eQTL\tall eQTL\n") else: l = line.strip().split("\t") if l[0] != "Total": try: count_unknown_ans = count_unknown[l[0]] tot_unknown += count_unknown[l[0]] except: count_unknown_ans = 0 tot_unknown = 0 chr_summary_v2.write(line.strip()+"\t"+str(count_genes[l[0]])+"\t"+str(count_cis[l[0]])+"\t"+str(count_trans[l[0]])+"\t"+str(count_unknown_ans)+"\t"+str(count_all[l[0]])+"\n") tot_genes += count_genes[l[0]] tot_cis += count_cis[l[0]] tot_trans += count_trans[l[0]] tot_all += count_all[l[0]] else: chr_summary_v2.write(line.strip()+"\t"+str(tot_genes)+"\t"+str(tot_cis)+"\t"+str(tot_trans)+"\t"+str(tot_unknown)+"\t"+str(tot_all)+"\n") chr_summary_v2.close() chr_summary.close() gene_pos_v2 = open(options.output6,'w') gene_pos_v2.write("gene\tchr\tstart_bp\tend_bp\tnum_eQTL\tnum_cis_eQTL\tnum_trans_eQTL\tnum_unknown_eQTL\tgene_bin\n") eqtl_count = 0 genes_with_eqtl = 0 cis_count = 0 genes_with_cis = 0 trans_count = 0 genes_with_trans = 0 only_cis = 0 only_trans = 0 cis_and_trans = 0 cis_or_trans = 0 for g in gene_dict.keys(): gene_pos_v2.write(g+"\t"+str(gene_dict[g][0])+"\t"+str(gene_dict[g][1])+"\t"+str(gene_dict[g][2])+"\t"+str(gene_dict[g][3])+"\t"+str(gene_dict[g][4])+"\t"+str(gene_dict[g][5])+"\t"+str(gene_dict[g][6])+"\t"+str(gene_bin_dict[g])+"\n") # num all eQTL if int(gene_dict[g][3]) != 0: genes_with_eqtl += 1 eqtl_count += int(gene_dict[g][3]) # num cis eQTL if int(gene_dict[g][4]) != 0: genes_with_cis += 1 cis_count += int(gene_dict[g][4]) # num trans eQTL if int(gene_dict[g][5]) != 0: genes_with_trans += 1 trans_count += int(gene_dict[g][5]) if (int(gene_dict[g][4]) != 0) and (int(gene_dict[g][5]) == 0): only_cis += 1 if (int(gene_dict[g][5]) != 0) and (int(gene_dict[g][4]) == 0): only_trans += 1 if (int(gene_dict[g][5]) != 0) and (int(gene_dict[g][4]) != 0): cis_and_trans += 1 if (int(gene_dict[g][5]) != 0) or (int(gene_dict[g][4]) != 0): cis_or_trans += 1 ave_num_eqtl_per_gene = eqtl_count/float(genes_with_eqtl) ave_num_cis_eqtl_per_gene = cis_count/float(genes_with_cis) ave_num_trans_eqtl_per_gene = trans_count/float(genes_with_trans) #print "Average number of eQTLs per gene with eQTL\t"+str(round(ave_num_eqtl_per_gene,1)) #print "Average number of cis eQTLs per gene with cis eQTL\t"+str(round(ave_num_cis_eqtl_per_gene,1)) #print "Average number of trans eQTLs per gene with trans eQTL\t"+str(round(ave_num_trans_eqtl_per_gene,1)) #print "Number of genes with only cis eQTL (no trans)\t"+str(only_cis) #print "Number of genes with only trans eQTL (no cis)\t"+str(only_trans) #print "Number of genes with cis and trans eQTL\t"+str(cis_and_trans) #print "Number of genes with cis or trans eQTL\t"+str(cis_or_trans) eqtl_per_gene = open(options.output7,'w') eqtl_per_gene.write("Average number of eQTLs per gene with eQTL\t"+str(round(ave_num_eqtl_per_gene,1))+"\n") eqtl_per_gene.write("Average number of cis eQTLs per gene with cis eQTL\t"+str(round(ave_num_cis_eqtl_per_gene,1))+"\n") eqtl_per_gene.write("Average number of trans eQTLs per gene with trans eQTL\t"+str(round(ave_num_trans_eqtl_per_gene,1))+"\n") eqtl_per_gene.write("Number of genes with only cis eQTL (no trans)\t"+str(only_cis)+" ("+str(round((int(only_cis)/float(cis_or_trans))*100,1))+"%)\n") eqtl_per_gene.write("Number of genes with only trans eQTL (no cis)\t"+str(only_trans)+" ("+str(round((int(only_trans)/float(cis_or_trans))*100,1))+"%)\n") eqtl_per_gene.write("Number of genes with cis and trans eQTL\t"+str(cis_and_trans)+" ("+str(round((int(cis_and_trans)/float(cis_or_trans))*100,1))+"%)\n") eqtl_per_gene.write("Number of genes with cis or trans eQTL\t"+str(cis_or_trans)+" ("+str(round((int(cis_and_trans)/float(cis_and_trans))*100,1))+"%)\n") eqtl_per_gene.close() gene_pos_v2.close() # Create temp direcotry tempdir = tempfile.mkdtemp() # copy INPUT file to the temp directory s = "cp %s %s/all_classification.txt" %(options.output1, tempdir) subprocess.call(s, shell=True) s = "cp %s %s/lookup.txt" %(options.input4, tempdir) subprocess.call(s, shell=True) # create R script => save in temp directory # generate new header new_script = open(tempdir+"/new_script.r","w") header = "setwd(\"%s\")" %tempdir new_script.write(header+"\n") # add script body script = open(options.rscript,"r") for line in script: new_script.write(line.strip()+"\n") new_script.close() # run R script from temp directory s = "R CMD BATCH %s/new_script.r out.txt" %tempdir subprocess.call(s, shell=True) # run R script from temp directory s = "R CMD BATCH %s/new_script.r out.txt" %tempdir subprocess.call(s, shell=True) os.system("mv %s/Rplot_eQTL_genes_positions.pdf %s" %(tempdir,options.output8)) if __name__=="__main__": __main__()