annotate make_unique_id.py @ 5:85167b9e6c7a draft default tip

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 669399887aebfe546eae29022f446664d9b52d44"
author brinkmanlab
date Sat, 05 Sep 2020 04:20:01 +0000
parents f2656e644641
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
1 #!/usr/bin/env python
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
2 import sys
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
3 from Bio import SeqIO
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
4 from collections import defaultdict
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
5
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
6 usage = """
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
7 make_unique_id
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
8 Makes all record ids unique across all input data.
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
9 All input data must be the same format.
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
10
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
11 Use: make_unique_id.py [-v] <format> <input1> <output1> [<input2> <output2> ... <inputn> <outputn>]
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
12 \t-v Print version and exit
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
13
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
14 Valid formats: clustal, embl, fasta, fasta-2line, fastq-sanger, fastq, fastq-solexa, fastq-illumina, genbank, gb, imgt,
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
15 nexus, phd, phylip, pir, seqxml, sff, stockholm, tab, qual
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
16 """
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
17
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
18 if __name__ == '__main__':
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
19 if '-v' in sys.argv:
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
20 print('1.0')
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
21 exit(0)
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
22
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
23 if len(sys.argv) < 4:
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
24 print("Missing arguments", file=sys.stderr)
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
25 print(usage, file=sys.stderr)
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
26 exit(1)
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
27
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
28 format = sys.argv[1]
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
29 ids = defaultdict(int)
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
30
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
31 def makeUnique(seq):
4
f2656e644641 "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
brinkmanlab
parents: 3
diff changeset
32 count = ids[seq.id]
f2656e644641 "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
brinkmanlab
parents: 3
diff changeset
33 ids[seq.id] += 1
0
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
34 if count:
4
f2656e644641 "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
brinkmanlab
parents: 3
diff changeset
35 oldid = seq.id
0
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
36 suffix = "_" + str(count)
4
f2656e644641 "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
brinkmanlab
parents: 3
diff changeset
37 seq.id += suffix
0
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
38 seq.name += suffix
4
f2656e644641 "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
brinkmanlab
parents: 3
diff changeset
39 print(f"{oldid}\t{seq.id}")
0
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
40
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
41 return seq
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
42
3
a2258ce2d58c "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 08b7b36e1eba516e4d7eb97086baf7ec8d177c5d"
brinkmanlab
parents: 2
diff changeset
43
0
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
44 paths = iter(sys.argv[2:])
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
45
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
46 for input, output in zip(paths, paths):
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
47 SeqIO.write(
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
48 map(makeUnique, SeqIO.parse(input, format)),
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
49 output,
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
50 format
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
51 )
a3a09dd8d09a "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
brinkmanlab
parents:
diff changeset
52
3
a2258ce2d58c "planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 08b7b36e1eba516e4d7eb97086baf7ec8d177c5d"
brinkmanlab
parents: 2
diff changeset
53