annotate fasta_tabular_converter.py @ 0:951cb6b3979b draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Sun, 21 Jun 2015 14:28:49 -0400
parents
children 2f7278120be9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
1 #!/usr/bin/python
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
2 #
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
3 import sys
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
4 from collections import defaultdict
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
5
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
6 def readfasta_writetabular(fasta, tabular):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
7 F = open(fasta, "r")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
8 for line in F:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
9 if line[0] == ">": continue
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
10 else:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
11 seqdic[line[:-1]] += 1
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
12 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
13 F = open(tabular, "w")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
14 for seq in sorted(seqdic, key=seqdic.get, reverse=True):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
15 print >> F, "%s\t%s" % (seq, seqdic[seq])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
16 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
17
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
18
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
19 def readtabular_writefasta(tabular, fasta):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
20 F = open(tabular, "r")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
21 Fw = open(fasta, "w")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
22 counter = 0
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
23 for line in F:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
24 fields = line.split()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
25 for i in range(int(fields[1])):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
26 counter += 1
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
27 print >> Fw, ">%s\n%s" % (counter, fields[0])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
28 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
29 Fw.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
30
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
31 def readtabular_writefastaweighted (tabular, fasta):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
32 F = open(tabular, "r")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
33 Fw = open(fasta, "w")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
34 counter = 0
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
35 for line in F:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
36 counter += 1
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
37 fields = line[:-1].split()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
38 print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
39 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
40 Fw.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
41
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
42 def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
43 F = open(fastaweigthed_input, "r")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
44 number_reads = 0
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
45 for line in F:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
46 if line[0] == ">":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
47 weigth = int(line[1:-1].split("_")[-1])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
48 number_reads += weigth
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
49 else:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
50 seqdic[line[:-1]] += weigth
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
51 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
52 F = open(fastaweigthed_reparsed, "w")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
53 n=0
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
54 for seq in sorted(seqdic, key=seqdic.get, reverse=True):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
55 n += 1
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
56 print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq)
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
57 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
58 print "%s reads collapsed" % number_reads
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
59
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
60 def readfastaeighted_writefasta(fastaweigthed, fasta):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
61 F = open(fastaweigthed, "r")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
62 Fw = open(fasta, "w")
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
63 counter = 0
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
64 for line in F:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
65 if line[0] == ">":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
66 weigth = int(line[1:-1].split("_")[-1])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
67 else:
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
68 seq = line[:-1]
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
69 for i in range (weigth):
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
70 counter += 1
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
71 print >> Fw, ">%s\n%s" % (counter, seq)
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
72 F.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
73 Fw.close()
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
74
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
75
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
76 seqdic = defaultdict(int)
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
77 option = sys.argv[3]
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
78
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
79 if option == "fasta2tabular":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
80 readfasta_writetabular(sys.argv[1], sys.argv[2])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
81 elif option == "tabular2fasta":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
82 readtabular_writefasta(sys.argv[1], sys.argv[2])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
83 elif option == "tabular2fastaweight":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
84 readtabular_writefastaweighted (sys.argv[1], sys.argv[2])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
85 elif option == "fastaweight2fastaweight":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
86 readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2])
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
87 elif option == "fastaweight2fasta":
951cb6b3979b planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
88 readfastaeighted_writefasta(sys.argv[1], sys.argv[2])