comparison fasta_tabular_converter.py @ 0:951cb6b3979b draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Sun, 21 Jun 2015 14:28:49 -0400
parents
children 2f7278120be9
comparison
equal deleted inserted replaced
-1:000000000000 0:951cb6b3979b
1 #!/usr/bin/python
2 #
3 import sys
4 from collections import defaultdict
5
6 def readfasta_writetabular(fasta, tabular):
7 F = open(fasta, "r")
8 for line in F:
9 if line[0] == ">": continue
10 else:
11 seqdic[line[:-1]] += 1
12 F.close()
13 F = open(tabular, "w")
14 for seq in sorted(seqdic, key=seqdic.get, reverse=True):
15 print >> F, "%s\t%s" % (seq, seqdic[seq])
16 F.close()
17
18
19 def readtabular_writefasta(tabular, fasta):
20 F = open(tabular, "r")
21 Fw = open(fasta, "w")
22 counter = 0
23 for line in F:
24 fields = line.split()
25 for i in range(int(fields[1])):
26 counter += 1
27 print >> Fw, ">%s\n%s" % (counter, fields[0])
28 F.close()
29 Fw.close()
30
31 def readtabular_writefastaweighted (tabular, fasta):
32 F = open(tabular, "r")
33 Fw = open(fasta, "w")
34 counter = 0
35 for line in F:
36 counter += 1
37 fields = line[:-1].split()
38 print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0])
39 F.close()
40 Fw.close()
41
42 def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed):
43 F = open(fastaweigthed_input, "r")
44 number_reads = 0
45 for line in F:
46 if line[0] == ">":
47 weigth = int(line[1:-1].split("_")[-1])
48 number_reads += weigth
49 else:
50 seqdic[line[:-1]] += weigth
51 F.close()
52 F = open(fastaweigthed_reparsed, "w")
53 n=0
54 for seq in sorted(seqdic, key=seqdic.get, reverse=True):
55 n += 1
56 print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq)
57 F.close()
58 print "%s reads collapsed" % number_reads
59
60 def readfastaeighted_writefasta(fastaweigthed, fasta):
61 F = open(fastaweigthed, "r")
62 Fw = open(fasta, "w")
63 counter = 0
64 for line in F:
65 if line[0] == ">":
66 weigth = int(line[1:-1].split("_")[-1])
67 else:
68 seq = line[:-1]
69 for i in range (weigth):
70 counter += 1
71 print >> Fw, ">%s\n%s" % (counter, seq)
72 F.close()
73 Fw.close()
74
75
76 seqdic = defaultdict(int)
77 option = sys.argv[3]
78
79 if option == "fasta2tabular":
80 readfasta_writetabular(sys.argv[1], sys.argv[2])
81 elif option == "tabular2fasta":
82 readtabular_writefasta(sys.argv[1], sys.argv[2])
83 elif option == "tabular2fastaweight":
84 readtabular_writefastaweighted (sys.argv[1], sys.argv[2])
85 elif option == "fastaweight2fastaweight":
86 readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2])
87 elif option == "fastaweight2fasta":
88 readfastaeighted_writefasta(sys.argv[1], sys.argv[2])