comparison tools/protein_analysis/wolf_psort.py @ 20:a19b3ded8f33 draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes
author peterjc
date Thu, 21 Sep 2017 11:35:20 -0400
parents f3ecd80850e2
children 238eae32483c
comparison
equal deleted inserted replaced
19:f3ecd80850e2 20:a19b3ded8f33
31 Additionally in order to take full advantage of multiple cores, by subdividing 31 Additionally in order to take full advantage of multiple cores, by subdividing
32 the input FASTA file multiple copies of WoLF PSORT are run in parallel. I would 32 the input FASTA file multiple copies of WoLF PSORT are run in parallel. I would
33 normally use Python's multiprocessing library in this situation but it requires 33 normally use Python's multiprocessing library in this situation but it requires
34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4. 34 at least Python 2.6 and at the time of writing Galaxy still supports Python 2.4.
35 """ 35 """
36
37 from __future__ import print_function
38
39 import os
36 import sys 40 import sys
37 import os 41
38 from seq_analysis_utils import split_fasta, run_jobs, thread_count 42 from seq_analysis_utils import run_jobs, split_fasta, thread_count
39 43
40 FASTA_CHUNK = 500 44 FASTA_CHUNK = 500
41 exe = "runWolfPsortSummary" 45 exe = "runWolfPsortSummary"
42 46
43 """ 47 """
59 63
60 For more details on this workaround, see: 64 For more details on this workaround, see:
61 https://lists.galaxyproject.org/pipermail/galaxy-dev/2015-December/023386.html 65 https://lists.galaxyproject.org/pipermail/galaxy-dev/2015-December/023386.html
62 """ 66 """
63 67
68 if "-v" in sys.argv or "--version" in sys.argv:
69 sys.exit("WoLF-PSORT wrapper version 0.0.11")
70
64 if len(sys.argv) != 5: 71 if len(sys.argv) != 5:
65 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") 72 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file")
66 73
67 organism = sys.argv[1] 74 organism = sys.argv[1]
68 if organism not in ["animal", "plant", "fungi"]: 75 if organism not in ["animal", "plant", "fungi"]:
82 for rank, comp_data in enumerate(data.split(",")): 89 for rank, comp_data in enumerate(data.split(",")):
83 comp, score = comp_data.split() 90 comp, score = comp_data.split()
84 out_handle.write("%s\t%s\t%s\t%i\n" 91 out_handle.write("%s\t%s\t%s\t%i\n"
85 % (name, comp, score, rank + 1)) 92 % (name, comp, score, rank + 1))
86 93
94
87 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) 95 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK)
88 temp_files = [f + ".out" for f in fasta_files] 96 temp_files = [f + ".out" for f in fasta_files]
89 assert len(fasta_files) == len(temp_files) 97 assert len(fasta_files) == len(temp_files)
90 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) 98 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp)
91 for (fasta, temp) in zip(fasta_files, temp_files)] 99 for (fasta, temp) in zip(fasta_files, temp_files)]
95 def clean_up(file_list): 103 def clean_up(file_list):
96 for f in file_list: 104 for f in file_list:
97 if os.path.isfile(f): 105 if os.path.isfile(f):
98 os.remove(f) 106 os.remove(f)
99 107
108
100 if len(jobs) > 1 and num_threads > 1: 109 if len(jobs) > 1 and num_threads > 1:
101 # A small "info" message for Galaxy to show the user. 110 # A small "info" message for Galaxy to show the user.
102 print "Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)) 111 print("Using %i threads for %i tasks" % (min(num_threads, len(jobs)), len(jobs)))
103 results = run_jobs(jobs, num_threads) 112 results = run_jobs(jobs, num_threads)
104 assert len(fasta_files) == len(temp_files) == len(jobs) 113 assert len(fasta_files) == len(temp_files) == len(jobs)
105 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs): 114 for fasta, temp, cmd in zip(fasta_files, temp_files, jobs):
106 error_level = results[cmd] 115 error_level = results[cmd]
107 try: 116 try: