annotate renameSequences2.py @ 3:e320ef2d105a draft

Uploaded
author petr-novak
date Thu, 05 Sep 2019 09:04:56 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
2 ### this version does not use st input!!!!
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
3 # how to use:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
4 # renameSequences.py fasta.file true index.out prefix_length # for paired sequences
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
5 # renameSequences.py fasta.file false index.out prefix_length # not paired sequences
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
6
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
7 import sys
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
8 paired = sys.argv[2] == "true"
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
9 index = open(sys.argv[3], 'w')
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
10
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
11 if len(sys.argv) == 4:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
12 prefix = 0
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
13 else:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
14 prefix = int(sys.argv[4])
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
15
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
16 if paired:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
17 P = 2
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
18 suffix = "f\n"
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
19 else:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
20 P = 1
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
21 suffix = "\n"
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
22 i = j = 0
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
23 reader = open(sys.argv[1], mode='r')
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
24 for oneline in reader:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
25 if oneline == "":
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
26 continue
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
27 if oneline[0] == ">":
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
28 i += 1
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
29 j += 1
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
30 prefix_string = oneline[1:(1 + prefix)].strip()
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
31 if j == 1:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
32 header = ">" + prefix_string + str(i) + suffix
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
33 index.write(oneline[1:].strip() + "\t" + prefix_string + str(i) +
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
34 suffix)
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
35 if j == 2:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
36 i -= 1
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
37 header = ">" + prefix_string + str(i) + "r\n"
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
38 index.write(oneline[1:].strip() + "\t" + prefix_string + str(i) +
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
39 "r\n")
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
40 sys.stdout.write(header)
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
41 if j == P:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
42 j = 0
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
43 else:
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
44 sys.stdout.write(oneline)
e320ef2d105a Uploaded
petr-novak
parents:
diff changeset
45 index.close()