comparison tools/protein_analysis/wolf_psort.py @ 19:f3ecd80850e2 draft

v0.2.9 Python style improvements
author peterjc
date Wed, 01 Feb 2017 09:46:42 -0500
parents eb6ac44d4b8e
children a19b3ded8f33
comparison
equal deleted inserted replaced
18:eb6ac44d4b8e 19:f3ecd80850e2
33 normally use Python's multiprocessing library in this situation but it requires 33 normally use Python's multiprocessing library in this situation but it requires
34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. 34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4.
35 """ 35 """
36 import sys 36 import sys
37 import os 37 import os
38 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count 38 from seq_analysis_utils import split_fasta, run_jobs, thread_count
39 39
40 FASTA_CHUNK = 500 40 FASTA_CHUNK = 500
41 exe = "runWolfPsortSummary" 41 exe = "runWolfPsortSummary"
42 42
43 """ 43 """
54 os.chdir("/opt/WoLFPSORT_package_v0.2/bin") 54 os.chdir("/opt/WoLFPSORT_package_v0.2/bin")
55 args = ["./runWolfPsortSummary"] + sys.argv[1:] 55 args = ["./runWolfPsortSummary"] + sys.argv[1:]
56 return_code = subprocess.call(args) 56 return_code = subprocess.call(args)
57 os.chdir(saved_dir) 57 os.chdir(saved_dir)
58 sys.exit(return_code) 58 sys.exit(return_code)
59
60 For more details on this workaround, see:
61 https://lists.galaxyproject.org/pipermail/galaxy-dev/2015-December/023386.html
59 """ 62 """
60 63
61 if len(sys.argv) != 5: 64 if len(sys.argv) != 5:
62 sys_exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") 65 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file")
63 66
64 organism = sys.argv[1] 67 organism = sys.argv[1]
65 if organism not in ["animal", "plant", "fungi"]: 68 if organism not in ["animal", "plant", "fungi"]:
66 sys_exit("Organism argument %s is not one of animal, plant, fungi" % organism) 69 sys.exit("Organism argument %s is not one of animal, plant, fungi" % organism)
67 70
68 num_threads = thread_count(sys.argv[2], default=4) 71 num_threads = thread_count(sys.argv[2], default=4)
69 fasta_file = sys.argv[3] 72 fasta_file = sys.argv[3]
70 tabular_file = sys.argv[4] 73 tabular_file = sys.argv[4]
74
71 75
72 def clean_tabular(raw_handle, out_handle): 76 def clean_tabular(raw_handle, out_handle):
73 """Clean up WoLF PSORT output to make it tabular.""" 77 """Clean up WoLF PSORT output to make it tabular."""
74 for line in raw_handle: 78 for line in raw_handle:
75 if not line or line.startswith("#"): 79 if not line or line.startswith("#"):
76 continue 80 continue
77 name, data = line.rstrip("\r\n").split(None,1) 81 name, data = line.rstrip("\r\n").split(None, 1)
78 for rank, comp_data in enumerate(data.split(",")): 82 for rank, comp_data in enumerate(data.split(",")):
79 comp, score = comp_data.split() 83 comp, score = comp_data.split()
80 out_handle.write("%s\t%s\t%s\t%i\n" \ 84 out_handle.write("%s\t%s\t%s\t%i\n"
81 % (name, comp, score, rank+1)) 85 % (name, comp, score, rank + 1))
82 86
83 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) 87 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK)
84 temp_files = [f+".out" for f in fasta_files] 88 temp_files = [f + ".out" for f in fasta_files]
85 assert len(fasta_files) == len(temp_files) 89 assert len(fasta_files) == len(temp_files)
86 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) 90 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp)
87 for (fasta, temp) in zip(fasta_files, temp_files)] 91 for (fasta, temp) in zip(fasta_files, temp_files)]
88 assert len(fasta_files) == len(temp_files) == len(jobs) 92 assert len(fasta_files) == len(temp_files) == len(jobs)
93
89 94
90 def clean_up(file_list): 95 def clean_up(file_list):
91 for f in file_list: 96 for f in file_list:
92 if os.path.isfile(f): 97 if os.path.isfile(f):
93 os.remove(f) 98 os.remove(f)
94 99
95 if len(jobs) > 1 and num_threads > 1: 100 if len(jobs) > 1 and num_threads > 1:
96 #A small "info" message for Galaxy to show the user. 101 # A small "info" message for Galaxy to show the user.
97 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) 102 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))
98 results = run_jobs(jobs, num_threads) 103 results = run_jobs(jobs, num_threads)
99 assert len(fasta_files) == len(temp_files) == len(jobs) 104 assert len(fasta_files) == len(temp_files) == len(jobs)
100 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): 105 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
101 error_level = results[cmd] 106 error_level = results[cmd]
104 except IOError: 109 except IOError:
105 output = "" 110 output = ""
106 if error_level or output.lower().startswith("error running"): 111 if error_level or output.lower().startswith("error running"):
107 clean_up(fasta_files) 112 clean_up(fasta_files)
108 clean_up(temp_files) 113 clean_up(temp_files)
109 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), 114 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
110 error_level) 115 error_level)
111 del results 116 del results
112 117
113 out_handle = open(tabular_file, "w") 118 out_handle = open(tabular_file, "w")
114 out_handle.write("#ID\tCompartment\tScore\tRank\n") 119 out_handle.write("#ID\tCompartment\tScore\tRank\n")