Mercurial > repos > peterjc > tmhmm_and_signalp
comparison tools/protein_analysis/wolf_psort.py @ 21:238eae32483c draft
"Check this is up to date with all 2020 changes (black etc)"
author | peterjc |
---|---|
date | Thu, 17 Jun 2021 08:21:06 +0000 |
parents | a19b3ded8f33 |
children | e1996f0f4e85 |
comparison
equal
deleted
inserted
replaced
20:a19b3ded8f33 | 21:238eae32483c |
---|---|
67 | 67 |
68 if "-v" in sys.argv or "--version" in sys.argv: | 68 if "-v" in sys.argv or "--version" in sys.argv: |
69 sys.exit("WoLF-PSORT wrapper version 0.0.11") | 69 sys.exit("WoLF-PSORT wrapper version 0.0.11") |
70 | 70 |
71 if len(sys.argv) != 5: | 71 if len(sys.argv) != 5: |
72 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") | 72 sys.exit( |
73 "Require four arguments, organism, threads, input protein FASTA file, " | |
74 "and output tabular file" | |
75 ) | |
73 | 76 |
74 organism = sys.argv[1] | 77 organism = sys.argv[1] |
75 if organism not in ["animal", "plant", "fungi"]: | 78 if organism not in ["animal", "plant", "fungi"]: |
76 sys.exit("Organism argument %s is not one of animal, plant, fungi" % organism) | 79 sys.exit("Organism argument %s is not one of animal, plant, fungi" % organism) |
77 | 80 |
86 if not line or line.startswith("#"): | 89 if not line or line.startswith("#"): |
87 continue | 90 continue |
88 name, data = line.rstrip("\r\n").split(None, 1) | 91 name, data = line.rstrip("\r\n").split(None, 1) |
89 for rank, comp_data in enumerate(data.split(",")): | 92 for rank, comp_data in enumerate(data.split(",")): |
90 comp, score = comp_data.split() | 93 comp, score = comp_data.split() |
91 out_handle.write("%s\t%s\t%s\t%i\n" | 94 out_handle.write("%s\t%s\t%s\t%i\n" % (name, comp, score, rank + 1)) |
92 % (name, comp, score, rank + 1)) | |
93 | 95 |
94 | 96 |
95 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) | 97 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) |
96 temp_files = [f + ".out" for f in fasta_files] | 98 temp_files = [f + ".out" for f in fasta_files] |
97 assert len(fasta_files) == len(temp_files) | 99 assert len(fasta_files) == len(temp_files) |
98 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) | 100 jobs = [ |
99 for (fasta, temp) in zip(fasta_files, temp_files)] | 101 "%s %s < %s > %s" % (exe, organism, fasta, temp) |
102 for (fasta, temp) in zip(fasta_files, temp_files) | |
103 ] | |
100 assert len(fasta_files) == len(temp_files) == len(jobs) | 104 assert len(fasta_files) == len(temp_files) == len(jobs) |
101 | 105 |
102 | 106 |
103 def clean_up(file_list): | 107 def clean_up(file_list): |
104 for f in file_list: | 108 for f in file_list: |
118 except IOError: | 122 except IOError: |
119 output = "" | 123 output = "" |
120 if error_level or output.lower().startswith("error running"): | 124 if error_level or output.lower().startswith("error running"): |
121 clean_up(fasta_files) | 125 clean_up(fasta_files) |
122 clean_up(temp_files) | 126 clean_up(temp_files) |
123 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), | 127 sys.exit( |
124 error_level) | 128 "One or more tasks failed, e.g. %i from %r gave:\n%s" |
129 % (error_level, cmd, output), | |
130 error_level, | |
131 ) | |
125 del results | 132 del results |
126 | 133 |
127 out_handle = open(tabular_file, "w") | 134 out_handle = open(tabular_file, "w") |
128 out_handle.write("#ID\tCompartment\tScore\tRank\n") | 135 out_handle.write("#ID\tCompartment\tScore\tRank\n") |
129 for temp in temp_files: | 136 for temp in temp_files: |