Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/wolf_psort.py @ 20:a19b3ded8f33 draft
v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author | peterjc |
---|---|
date | Thu, 21 Sep 2017 11:35:20 -0400 |
parents | f3ecd80850e2 |
children | 238eae32483c |
comparison
equal
deleted
inserted
replaced
19:f3ecd80850e2 | 20:a19b3ded8f33 |
---|---|
31 Additionally in order to take full advantage of multiple cores, by subdividing | 31 Additionally in order to take full advantage of multiple cores, by subdividing |
32 the input FASTA file multiple copies of WoLF PSORT are run in parallel. I would | 32 the input FASTA file multiple copies of WoLF PSORT are run in parallel. I would |
33 normally use Python's multiprocessing library in this situation but it requires | 33 normally use Python's multiprocessing library in this situation but it requires |
34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. | 34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. |
35 """ | 35 """ |
36 | |
37 from __future__ import print_function | |
38 | |
39 import os | |
36 import sys | 40 import sys |
37 import os | 41 |
38 from seq_analysis_utils import split_fasta, run_jobs, thread_count | 42 from seq_analysis_utils import run_jobs, split_fasta, thread_count |
39 | 43 |
40 FASTA_CHUNK = 500 | 44 FASTA_CHUNK = 500 |
41 exe = "runWolfPsortSummary" | 45 exe = "runWolfPsortSummary" |
42 | 46 |
43 """ | 47 """ |
59 | 63 |
60 For more details on this workaround, see: | 64 For more details on this workaround, see: |
61 https://lists.galaxyproject.org/pipermail/galaxy-dev/2015-December/023386.html | 65 https://lists.galaxyproject.org/pipermail/galaxy-dev/2015-December/023386.html |
62 """ | 66 """ |
63 | 67 |
68 if "-v" in sys.argv or "--version" in sys.argv: | |
69 sys.exit("WoLF-PSORT wrapper version 0.0.11") | |
70 | |
64 if len(sys.argv) != 5: | 71 if len(sys.argv) != 5: |
65 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") | 72 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") |
66 | 73 |
67 organism = sys.argv[1] | 74 organism = sys.argv[1] |
68 if organism not in ["animal", "plant", "fungi"]: | 75 if organism not in ["animal", "plant", "fungi"]: |
82 for rank, comp_data in enumerate(data.split(",")): | 89 for rank, comp_data in enumerate(data.split(",")): |
83 comp, score = comp_data.split() | 90 comp, score = comp_data.split() |
84 out_handle.write("%s\t%s\t%s\t%i\n" | 91 out_handle.write("%s\t%s\t%s\t%i\n" |
85 % (name, comp, score, rank + 1)) | 92 % (name, comp, score, rank + 1)) |
86 | 93 |
94 | |
87 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) | 95 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) |
88 temp_files = [f + ".out" for f in fasta_files] | 96 temp_files = [f + ".out" for f in fasta_files] |
89 assert len(fasta_files) == len(temp_files) | 97 assert len(fasta_files) == len(temp_files) |
90 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) | 98 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) |
91 for (fasta, temp) in zip(fasta_files, temp_files)] | 99 for (fasta, temp) in zip(fasta_files, temp_files)] |
95 def clean_up(file_list): | 103 def clean_up(file_list): |
96 for f in file_list: | 104 for f in file_list: |
97 if os.path.isfile(f): | 105 if os.path.isfile(f): |
98 os.remove(f) | 106 os.remove(f) |
99 | 107 |
108 | |
100 if len(jobs) > 1 and num_threads > 1: | 109 if len(jobs) > 1 and num_threads > 1: |
101 # A small "info" message for Galaxy to show the user. | 110 # A small "info" message for Galaxy to show the user. |
102 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) | 111 print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs))) |
103 results = run_jobs(jobs, num_threads) | 112 results = run_jobs(jobs, num_threads) |
104 assert len(fasta_files) == len(temp_files) == len(jobs) | 113 assert len(fasta_files) == len(temp_files) == len(jobs) |
105 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): | 114 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): |
106 error_level = results[cmd] | 115 error_level = results[cmd] |
107 try: | 116 try: |