comparison tools/protein_analysis/wolf_psort.py @ 21:238eae32483c draft

"Check this is up to date with all 2020 changes (black etc)"
author peterjc
date Thu, 17 Jun 2021 08:21:06 +0000
parents a19b3ded8f33
children e1996f0f4e85
comparison
equal deleted inserted replaced
20:a19b3ded8f33 21:238eae32483c
67 67
68 if "-v" in sys.argv or "--version" in sys.argv: 68 if "-v" in sys.argv or "--version" in sys.argv:
69 sys.exit("WoLF-PSORT wrapper version 0.0.11") 69 sys.exit("WoLF-PSORT wrapper version 0.0.11")
70 70
71 if len(sys.argv) != 5: 71 if len(sys.argv) != 5:
72 sys.exit("Require four arguments, organism, threads, input protein FASTA file & output tabular file") 72 sys.exit(
73 "Require four arguments, organism, threads, input protein FASTA file, "
74 "and output tabular file"
75 )
73 76
74 organism = sys.argv[1] 77 organism = sys.argv[1]
75 if organism not in ["animal", "plant", "fungi"]: 78 if organism not in ["animal", "plant", "fungi"]:
76 sys.exit("Organism argument %s is not one of animal, plant, fungi" % organism) 79 sys.exit("Organism argument %s is not one of animal, plant, fungi" % organism)
77 80
86 if not line or line.startswith("#"): 89 if not line or line.startswith("#"):
87 continue 90 continue
88 name, data = line.rstrip("\r\n").split(None, 1) 91 name, data = line.rstrip("\r\n").split(None, 1)
89 for rank, comp_data in enumerate(data.split(",")): 92 for rank, comp_data in enumerate(data.split(",")):
90 comp, score = comp_data.split() 93 comp, score = comp_data.split()
91 out_handle.write("%s\t%s\t%s\t%i\n" 94 out_handle.write("%s\t%s\t%s\t%i\n" % (name, comp, score, rank + 1))
92 % (name, comp, score, rank + 1))
93 95
94 96
95 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK) 97 fasta_files = split_fasta(fasta_file, tabular_file, n=FASTA_CHUNK)
96 temp_files = [f + ".out" for f in fasta_files] 98 temp_files = [f + ".out" for f in fasta_files]
97 assert len(fasta_files) == len(temp_files) 99 assert len(fasta_files) == len(temp_files)
98 jobs = ["%s %s < %s > %s" % (exe, organism, fasta, temp) 100 jobs = [
99 for (fasta, temp) in zip(fasta_files, temp_files)] 101 "%s %s < %s > %s" % (exe, organism, fasta, temp)
102 for (fasta, temp) in zip(fasta_files, temp_files)
103 ]
100 assert len(fasta_files) == len(temp_files) == len(jobs) 104 assert len(fasta_files) == len(temp_files) == len(jobs)
101 105
102 106
103 def clean_up(file_list): 107 def clean_up(file_list):
104 for f in file_list: 108 for f in file_list:
118 except IOError: 122 except IOError:
119 output = "" 123 output = ""
120 if error_level or output.lower().startswith("error running"): 124 if error_level or output.lower().startswith("error running"):
121 clean_up(fasta_files) 125 clean_up(fasta_files)
122 clean_up(temp_files) 126 clean_up(temp_files)
123 sys.exit("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output), 127 sys.exit(
124 error_level) 128 "One or more tasks failed, e.g. %i from %r gave:\n%s"
129 % (error_level, cmd, output),
130 error_level,
131 )
125 del results 132 del results
126 133
127 out_handle = open(tabular_file, "w") 134 out_handle = open(tabular_file, "w")
128 out_handle.write("#ID\tCompartment\tScore\tRank\n") 135 out_handle.write("#ID\tCompartment\tScore\tRank\n")
129 for temp in temp_files: 136 for temp in temp_files: