Mercurial > repos > devteam > fasta_compute_length
annotate utils/fasta_to_len.py @ 4:e12f68d2cc4e draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:24:10 -0500 |
parents | ece409f6573c |
children | 7d37cfda8e00 |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """ | |
3 Input: fasta, int | |
4 Output: tabular | |
5 Return titles with lengths of corresponding seq | |
6 """ | |
7 | |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
8 import sys |
0 | 9 |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
10 assert sys.version_info[:2] >= (2, 4) |
0 | 11 |
12 | |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
13 def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
14 keep_first_char = int(keep_first_char) |
0 | 15 fasta_title = '' |
16 seq_len = 0 | |
17 | |
18 # number of char to keep in the title | |
19 if keep_first_char == 0: | |
20 keep_first_char = None | |
21 else: | |
22 keep_first_char += 1 | |
23 | |
24 first_entry = True | |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
25 with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
26 for line in in_fh: |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
27 line = line.strip() |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
28 if not line or line.startswith('#'): |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
29 continue |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
30 if line[0] == '>': |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
31 if first_entry is False: |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
32 if keep_first_word: |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
33 fasta_title = fasta_title.split()[0] |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
34 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
35 else: |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
36 first_entry = False |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
37 fasta_title = line |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
38 seq_len = 0 |
0 | 39 else: |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
40 seq_len += len(line) |
0 | 41 |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
42 # last fasta-entry |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
43 if keep_first_word: |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
44 fasta_title = fasta_title.split()[0] |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
45 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) |
0 | 46 |
4
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
47 |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
48 if __name__ == "__main__": |
e12f68d2cc4e
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
49 compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True) |