comparison fasta_tabular_converter.py @ 1:2f7278120be9 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
author drosofff
date Tue, 22 Mar 2016 18:54:00 -0400
parents 951cb6b3979b
children 330dd8a8c31a
comparison
equal deleted inserted replaced
0:951cb6b3979b 1:2f7278120be9
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # 2 #
3 import sys 3 import sys
4 import string
5 import argparse
4 from collections import defaultdict 6 from collections import defaultdict
5 7
6 def readfasta_writetabular(fasta, tabular): 8 def Parser():
7 F = open(fasta, "r") 9 the_parser = argparse.ArgumentParser()
8 for line in F: 10 the_parser.add_argument(
9 if line[0] == ">": continue 11 '--input', action="store", type=str, help="input file")
10 else: 12 the_parser.add_argument(
11 seqdic[line[:-1]] += 1 13 '--output', action="store", type=str, help="output converted file")
12 F.close() 14 the_parser.add_argument(
13 F = open(tabular, "w") 15 '--type', action="store", type=str, help="type of convertion")
14 for seq in sorted(seqdic, key=seqdic.get, reverse=True): 16 args = the_parser.parse_args()
15 print >> F, "%s\t%s" % (seq, seqdic[seq]) 17 return args
16 F.close() 18
19 def readfasta_writetabular(fasta, tabular, mode="oneline"):
20 F = open(fasta, "r")
21 for line in F:
22 if line[0] == ">":
23 try:
24 seqdic["".join(stringlist)] += 1 # to dump the sequence of the previous item - try because of first missing stringlist variable
25 except: pass
26 stringlist=[]
27 else:
28 stringlist.append(line[:-1])
29 seqdic["".join(stringlist)] += 1 # for the last sequence
30 F.close()
31 F = open(tabular, "w")
32 for seq in sorted(seqdic, key=seqdic.get, reverse=True):
33 print >> F, "%s\t%s" % (seq, seqdic[seq])
34 F.close()
17 35
18 36
19 def readtabular_writefasta(tabular, fasta): 37 def readtabular_writefasta(tabular, fasta):
20 F = open(tabular, "r") 38 F = open(tabular, "r")
21 Fw = open(fasta, "w") 39 Fw = open(fasta, "w")
70 counter += 1 88 counter += 1
71 print >> Fw, ">%s\n%s" % (counter, seq) 89 print >> Fw, ">%s\n%s" % (counter, seq)
72 F.close() 90 F.close()
73 Fw.close() 91 Fw.close()
74 92
93 def main(input, output, type):
94 if type == "fasta2tabular":
95 readfasta_writetabular(input, output)
96 elif type == "tabular2fasta":
97 readtabular_writefasta(input, output)
98 elif type == "tabular2fastaweight":
99 readtabular_writefastaweighted (input, output)
100 elif type == "fastaweight2fastaweight":
101 readfastaeighted_writefastaweighted(input, output)
102 elif type == "fastaweight2fasta":
103 readfastaeighted_writefasta(input, output)
75 104
76 seqdic = defaultdict(int) 105 if __name__ == "__main__":
77 option = sys.argv[3] 106 seqdic = defaultdict(int)
78 107 args = Parser()
79 if option == "fasta2tabular": 108 main (args.input, args.output, args.type)
80 readfasta_writetabular(sys.argv[1], sys.argv[2])
81 elif option == "tabular2fasta":
82 readtabular_writefasta(sys.argv[1], sys.argv[2])
83 elif option == "tabular2fastaweight":
84 readtabular_writefastaweighted (sys.argv[1], sys.argv[2])
85 elif option == "fastaweight2fastaweight":
86 readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2])
87 elif option == "fastaweight2fasta":
88 readfastaeighted_writefasta(sys.argv[1], sys.argv[2])