Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/wolf_psort.py @ 19:f3ecd80850e2 draft
v0.2.9 Python style improvements
author | peterjc |
---|---|
date | Wed, 01 Feb 2017 09:46:42 -0500 |
parents | eb6ac44d4b8e |
children | a19b3ded8f33 |
comparison
equal
deleted
inserted
replaced
18:eb6ac44d4b8e | 19:f3ecd80850e2 |
---|---|
33 normally use Python's multiprocessing library in this situation but it requires | 33 normally use Python's multiprocessing library in this situation but it requires |
34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. | 34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. |
35 """ | 35 """ |
36 import sys | 36 import sys |
37 import os | 37 import os |
38 from seq_analysis_utils import sys_exit, split_fasta, run_jobs, thread_count | 38 from seq_analysis_utils import split_fasta, run_jobs, thread_count |
39 | 39 |
40 FASTA_CHUNK = 500 | 40 FASTA_CHUNK = 500 |
41 exe = "runWolfPsortSummary" | 41 exe = "runWolfPsortSummary" |
42 | 42 |
43 """ | 43 """ |
54 os.chdir("/opt/WoLFPSORT_package_v0.2/bin") | 54 os.chdir("/opt/WoLFPSORT_package_v0.2/bin") |
55 args = ["./runWolfPsortSummary"] + sys.argv[1:] | 55 args = ["./runWolfPsortSummary"] + sys.argv[1:] |
56 return_code = subprocess.call(args) | 56 return_code = subprocess.call(args) |
57 os.chdir(saved_dir) | 57 os.chdir(saved_dir) |
58 sys.exit(return_code) | 58 sys.exit(return_code) |
59 | |
60 For more details on this workaround, see: | |
61 https://lists.galaxyproject.org/pipermail/galaxy-dev/2015-December/023386.html | |
59 """ | 62 """ |
60 | 63 |
61 if len(sys.argv) != 5: | 64 if len(sys.argv) != 5: |
62 sys_exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") | 65 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") |
63 | 66 |
64 organism = sys.argv[1] | 67 organism = sys.argv[1] |
65 if organism not in ["animal", "plant", "fungi"]: | 68 if organism not in ["animal", "plant", "fungi"]: |
66 sys_exit("Organism argument %s is not one of animal, plant, fungi" % organism) | 69 sys.exit("Organism argument %s is not one of animal, plant, fungi" % organism) |
67 | 70 |
68 num_threads = thread_count(sys.argv[2], default=4) | 71 num_threads = thread_count(sys.argv[2], default=4) |
69 fasta_file = sys.argv[3] | 72 fasta_file = sys.argv[3] |
70 tabular_file = sys.argv[4] | 73 tabular_file = sys.argv[4] |
74 | |
71 | 75 |
72 def clean_tabular(raw_handle, out_handle): | 76 def clean_tabular(raw_handle, out_handle): |
73 """Clean up WoLF PSORT output to make it tabular.""" | 77 """Clean up WoLF PSORT output to make it tabular.""" |
74 for line in raw_handle: | 78 for line in raw_handle: |
75 if not line or line.startswith("#"): | 79 if not line or line.startswith("#"): |
76 continue | 80 continue |
77 name, data = line.rstrip("\r\n").split(None,1) | 81 name, data = line.rstrip("\r\n").split(None, 1) |
78 for rank, comp_data in enumerate(data.split(",")): | 82 for rank, comp_data in enumerate(data.split(",")): |
79 comp, score = comp_data.split() | 83 comp, score = comp_data.split() |
80 out_handle.write("%s\t%s\t%s\t%i\n" \ | 84 out_handle.write("%s\t%s\t%s\t%i\n" |
81 % (name, comp, score, rank+1)) | 85 % (name, comp, score, rank + 1)) |
82 | 86 |
83 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) | 87 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) |
84 temp_files = [f+".out" for f in fasta_files] | 88 temp_files = [f + ".out" for f in fasta_files] |
85 assert len(fasta_files) == len(temp_files) | 89 assert len(fasta_files) == len(temp_files) |
86 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) | 90 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) |
87 for (fasta, temp) in zip(fasta_files, temp_files)] | 91 for (fasta, temp) in zip(fasta_files, temp_files)] |
88 assert len(fasta_files) == len(temp_files) == len(jobs) | 92 assert len(fasta_files) == len(temp_files) == len(jobs) |
93 | |
89 | 94 |
90 def clean_up(file_list): | 95 def clean_up(file_list): |
91 for f in file_list: | 96 for f in file_list: |
92 if os.path.isfile(f): | 97 if os.path.isfile(f): |
93 os.remove(f) | 98 os.remove(f) |
94 | 99 |
95 if len(jobs) > 1 and num_threads > 1: | 100 if len(jobs) > 1 and num_threads > 1: |
96 #A small "info" message for Galaxy to show the user. | 101 # A small "info" message for Galaxy to show the user. |
97 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 102 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) |
98 results = run_jobs(jobs, num_threads) | 103 results = run_jobs(jobs, num_threads) |
99 assert len(fasta_files) == len(temp_files) == len(jobs) | 104 assert len(fasta_files) == len(temp_files) == len(jobs) |
100 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): | 105 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): |
101 error_level = results[cmd] | 106 error_level = results[cmd] |
104 except IOError: | 109 except IOError: |
105 output = "" | 110 output = "" |
106 if error_level or output.lower().startswith("error running"): | 111 if error_level or output.lower().startswith("error running"): |
107 clean_up(fasta_files) | 112 clean_up(fasta_files) |
108 clean_up(temp_files) | 113 clean_up(temp_files) |
109 sys_exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 114 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), |
110 error_level) | 115 error_level) |
111 del results | 116 del results |
112 | 117 |
113 out_handle = open(tabular_file, "w") | 118 out_handle = open(tabular_file, "w") |
114 out_handle.write("#ID\tCompartment\tScore\tRank\n") | 119 out_handle.write("#ID\tCompartment\tScore\tRank\n") |