annotate trim.py @ 3:423d320bc1ba draft default tip

Uploaded
author davidvanzessen
date Mon, 13 Mar 2017 05:52:29 -0400
parents cb08a27e5fc2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
1 import argparse
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
2
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
3 #docs.python.org/dev/library/argparse.html
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
4 parser = argparse.ArgumentParser()
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
5 parser.add_argument("--input", help="Input fasta")
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
6 parser.add_argument("--output", help="Output fasta")
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
7 parser.add_argument("--start", help="How many nucleotides to trim from the start", type=int)
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
8 parser.add_argument("--end", help="How many nucleotides to trim from the end", type=int)
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
9
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
10 args = parser.parse_args()
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
11 start = int(args.start)
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
12 end = int(args.end)
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
13
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
14 print args.input
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
15 print args.output
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
16 print start
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
17 print end
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
18
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
19 if end <= 0 and start <= 0:
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
20 import shutil
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
21 shutil.copy(args.input, args.output)
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
22 import sys
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
23 sys.exit()
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
24
3
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
25 def trim(string, s, e):
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
26 if e == 0:
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
27 return string[s:]
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
28 else:
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
29 return string[s:-e]
0
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
30
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
31 currentSeq = ""
cb08a27e5fc2 Uploaded
davidvanzessen
parents:
diff changeset
32 currentId = ""
3
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
33 with open(args.input, 'r') as i, open(args.output, 'w') as o:
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
34 for line in i:
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
35 print "ID:", currentId
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
36 if line[0] == ">":
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
37 currentSeq = trim(currentSeq, start, end)
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
38 if len(currentId) > 0 and len(currentSeq) > 0:
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
39 o.write(currentId)
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
40 o.write(currentSeq + "\n")
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
41 currentId = line
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
42 currentSeq = ""
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
43 else:
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
44 currentSeq += line.rstrip()
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
45 o.write(currentId)
423d320bc1ba Uploaded
davidvanzessen
parents: 0
diff changeset
46 o.write(currentSeq + "\n")