comparison summary_to_fasta.py @ 4:5ffd52fc35c4 draft

Uploaded
author davidvanzessen
date Mon, 12 Dec 2016 05:22:37 -0500
parents
children
comparison
equal deleted inserted replaced
3:beaa487ecf43 4:5ffd52fc35c4
1 import argparse
2
3 parser = argparse.ArgumentParser()
4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
5 parser.add_argument("--fasta", help="The output fasta file")
6
7 args = parser.parse_args()
8
9 infile = args.input
10 fasta = args.fasta
11
12 with open(infile, 'r') as i, open(fasta, 'w') as o:
13 first = True
14 id_col = 0
15 seq_col = 0
16 no_results = 0
17 no_seqs = 0
18 passed = 0
19 for line in i:
20 splt = line.split("\t")
21 if first:
22 id_col = splt.index("Sequence ID")
23 seq_col = splt.index("Sequence")
24 first = False
25 continue
26 if len(splt) < 5:
27 no_results += 1
28 continue
29
30 ID = splt[id_col]
31 seq = splt[seq_col]
32
33 if not len(seq) > 0:
34 no_seqs += 1
35 continue
36
37 o.write(">" + ID + "\n" + seq + "\n")
38 passed += 1
39
40 print "No results:", no_results
41 print "No sequences:", no_seqs
42 print "Written to fasta file:", passed