Mercurial > repos > davidvanzessen > shm_csr
annotate summary_to_fasta.py @ 92:cf8ad181628f draft
planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
author | rhpvorderman |
---|---|
date | Mon, 12 Dec 2022 12:32:44 +0000 |
parents | 729738462297 |
children |
rev | line source |
---|---|
0 | 1 import argparse |
2 | |
3 parser = argparse.ArgumentParser() | |
4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") | |
5 parser.add_argument("--fasta", help="The output fasta file") | |
6 | |
7 args = parser.parse_args() | |
8 | |
9 infile = args.input | |
10 fasta = args.fasta | |
11 | |
12 with open(infile, 'r') as i, open(fasta, 'w') as o: | |
13 first = True | |
14 id_col = 0 | |
15 seq_col = 0 | |
16 no_results = 0 | |
17 no_seqs = 0 | |
18 passed = 0 | |
19 for line in i: | |
20 splt = line.split("\t") | |
21 if first: | |
22 id_col = splt.index("Sequence ID") | |
23 seq_col = splt.index("Sequence") | |
24 first = False | |
25 continue | |
26 if len(splt) < 5: | |
27 no_results += 1 | |
28 continue | |
29 | |
30 ID = splt[id_col] | |
31 seq = splt[seq_col] | |
32 | |
33 if not len(seq) > 0: | |
34 no_seqs += 1 | |
35 continue | |
36 | |
37 o.write(">" + ID + "\n" + seq + "\n") | |
38 passed += 1 | |
39 | |
83
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
0
diff
changeset
|
40 print("No results:", no_results) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
0
diff
changeset
|
41 print("No sequences:", no_seqs) |
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
0
diff
changeset
|
42 print("Written to fasta file:", passed) |