Mercurial > repos > drosofff > msp_fasta_tabular_converter
annotate fasta_tabular_converter.py @ 1:2f7278120be9 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
author | drosofff |
---|---|
date | Tue, 22 Mar 2016 18:54:00 -0400 |
parents | 951cb6b3979b |
children | 330dd8a8c31a |
rev | line source |
---|---|
0
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
2 # |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
3 import sys |
1
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
4 import string |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
5 import argparse |
0
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
6 from collections import defaultdict |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
7 |
1
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
8 def Parser(): |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
9 the_parser = argparse.ArgumentParser() |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
10 the_parser.add_argument( |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
11 '--input', action="store", type=str, help="input file") |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
12 the_parser.add_argument( |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
13 '--output', action="store", type=str, help="output converted file") |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
14 the_parser.add_argument( |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
15 '--type', action="store", type=str, help="type of convertion") |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
16 args = the_parser.parse_args() |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
17 return args |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
18 |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
19 def readfasta_writetabular(fasta, tabular, mode="oneline"): |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
20 F = open(fasta, "r") |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
21 for line in F: |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
22 if line[0] == ">": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
23 try: |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
24 seqdic["".join(stringlist)] += 1 # to dump the sequence of the previous item - try because of first missing stringlist variable |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
25 except: pass |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
26 stringlist=[] |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
27 else: |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
28 stringlist.append(line[:-1]) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
29 seqdic["".join(stringlist)] += 1 # for the last sequence |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
30 F.close() |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
31 F = open(tabular, "w") |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
32 for seq in sorted(seqdic, key=seqdic.get, reverse=True): |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
33 print >> F, "%s\t%s" % (seq, seqdic[seq]) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
34 F.close() |
0
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
35 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
36 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
37 def readtabular_writefasta(tabular, fasta): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
38 F = open(tabular, "r") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
39 Fw = open(fasta, "w") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
40 counter = 0 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
41 for line in F: |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
42 fields = line.split() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
43 for i in range(int(fields[1])): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
44 counter += 1 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
45 print >> Fw, ">%s\n%s" % (counter, fields[0]) |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
46 F.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
47 Fw.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
48 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
49 def readtabular_writefastaweighted (tabular, fasta): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
50 F = open(tabular, "r") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
51 Fw = open(fasta, "w") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
52 counter = 0 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
53 for line in F: |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
54 counter += 1 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
55 fields = line[:-1].split() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
56 print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0]) |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
57 F.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
58 Fw.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
59 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
60 def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
61 F = open(fastaweigthed_input, "r") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
62 number_reads = 0 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
63 for line in F: |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
64 if line[0] == ">": |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
65 weigth = int(line[1:-1].split("_")[-1]) |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
66 number_reads += weigth |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
67 else: |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
68 seqdic[line[:-1]] += weigth |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
69 F.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
70 F = open(fastaweigthed_reparsed, "w") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
71 n=0 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
72 for seq in sorted(seqdic, key=seqdic.get, reverse=True): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
73 n += 1 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
74 print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq) |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
75 F.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
76 print "%s reads collapsed" % number_reads |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
77 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
78 def readfastaeighted_writefasta(fastaweigthed, fasta): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
79 F = open(fastaweigthed, "r") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
80 Fw = open(fasta, "w") |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
81 counter = 0 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
82 for line in F: |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
83 if line[0] == ">": |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
84 weigth = int(line[1:-1].split("_")[-1]) |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
85 else: |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
86 seq = line[:-1] |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
87 for i in range (weigth): |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
88 counter += 1 |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
89 print >> Fw, ">%s\n%s" % (counter, seq) |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
90 F.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
91 Fw.close() |
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
92 |
1
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
93 def main(input, output, type): |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
94 if type == "fasta2tabular": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
95 readfasta_writetabular(input, output) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
96 elif type == "tabular2fasta": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
97 readtabular_writefasta(input, output) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
98 elif type == "tabular2fastaweight": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
99 readtabular_writefastaweighted (input, output) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
100 elif type == "fastaweight2fastaweight": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
101 readfastaeighted_writefastaweighted(input, output) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
102 elif type == "fastaweight2fasta": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
103 readfastaeighted_writefasta(input, output) |
0
951cb6b3979b
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
104 |
1
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
105 if __name__ == "__main__": |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
106 seqdic = defaultdict(int) |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
107 args = Parser() |
2f7278120be9
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
drosofff
parents:
0
diff
changeset
|
108 main (args.input, args.output, args.type) |