Mercurial > repos > davidvanzessen > shm_csr
annotate summary_to_fasta.py @ 94:84e9e5c8c101 draft
"planemo upload commit d4be85014b638f1d50b318d4b735be7f6e973140"
| author | rhpvorderman |
|---|---|
| date | Fri, 24 Mar 2023 16:58:28 +0000 |
| parents | 729738462297 |
| children |
| rev | line source |
|---|---|
| 0 | 1 import argparse |
| 2 | |
| 3 parser = argparse.ArgumentParser() | |
| 4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") | |
| 5 parser.add_argument("--fasta", help="The output fasta file") | |
| 6 | |
| 7 args = parser.parse_args() | |
| 8 | |
| 9 infile = args.input | |
| 10 fasta = args.fasta | |
| 11 | |
| 12 with open(infile, 'r') as i, open(fasta, 'w') as o: | |
| 13 first = True | |
| 14 id_col = 0 | |
| 15 seq_col = 0 | |
| 16 no_results = 0 | |
| 17 no_seqs = 0 | |
| 18 passed = 0 | |
| 19 for line in i: | |
| 20 splt = line.split("\t") | |
| 21 if first: | |
| 22 id_col = splt.index("Sequence ID") | |
| 23 seq_col = splt.index("Sequence") | |
| 24 first = False | |
| 25 continue | |
| 26 if len(splt) < 5: | |
| 27 no_results += 1 | |
| 28 continue | |
| 29 | |
| 30 ID = splt[id_col] | |
| 31 seq = splt[seq_col] | |
| 32 | |
| 33 if not len(seq) > 0: | |
| 34 no_seqs += 1 | |
| 35 continue | |
| 36 | |
| 37 o.write(">" + ID + "\n" + seq + "\n") | |
| 38 passed += 1 | |
| 39 | |
|
83
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
0
diff
changeset
|
40 print("No results:", no_results) |
|
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
0
diff
changeset
|
41 print("No sequences:", no_seqs) |
|
729738462297
"planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents:
0
diff
changeset
|
42 print("Written to fasta file:", passed) |
