annotate cherry_pick_fasta.py @ 8:ee689b6999d5 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/cherry_pick_fasta commit 6cbabbaa6706494c530833f0fb0cbeebce4f150b
author artbio
date Wed, 11 Oct 2023 14:27:09 +0000
parents 6c0aefd9fee3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
1 import argparse
7
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
2 from collections import defaultdict
0
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
3
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
4
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
5 def Parser():
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
6 the_parser = argparse.ArgumentParser(
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
7 description='Cherry pick fasta sequences')
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
8 the_parser.add_argument('--input', action='store', type=str,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
9 help='input fasta file')
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
10 the_parser.add_argument('--searchfor', action='store', type=str,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
11 help='with, without, or withlist, withoutlist')
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
12 the_parser.add_argument('--mode', action='store', type=str,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
13 default='includes', help='exact or includes')
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
14 the_parser.add_argument('--query-string', dest='query_string',
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
15 action='store', type=str,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
16 help='headers containing the string will be \
1
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
17 extracted or excluded as well as the \
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
18 corresponding sequence')
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
19 the_parser.add_argument('--query-file', dest='query_file',
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
20 action='store', type=str,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
21 help='headers containing any of the strings \
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
22 provided in the text file (1 string per \
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
23 line) will be extracted or excluded as well \
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
24 as the corresponding sequence')
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
25 the_parser.add_argument('--output', action='store', type=str,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
26 help='output fasta file')
0
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
27 args = the_parser.parse_args()
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
28 return args
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
29
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
30
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
31 def parse_fasta_dict(query, fasta_dict, mode):
7
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
32
1
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
33 if not isinstance(query, list):
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
34 query = [query]
7
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
35
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
36 def kmers(string, ksize, index):
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
37 if ksize > len(string):
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
38 return
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
39 for i in range(len(string) - ksize + 1):
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
40 kmer = string[i:i+ksize]
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
41 index[kmer].append(string)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
42
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
43 def consult_index(word, index):
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
44 accumulator = []
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
45 print(len(index[word]))
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
46 for title in index[word]:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
47 accumulator.append(title)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
48 print(len(accumulator))
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
49 for title in set(accumulator):
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
50 print(title)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
51
1
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
52 accumulator = []
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
53 if mode == 'includes':
7
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
54 kmersizes = set([len(word) for word in query])
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
55 index = defaultdict(list)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
56 for size in kmersizes:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
57 for header in fasta_dict:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
58 kmers(header, size, index)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
59 for keyword in query:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
60 for header in index[keyword]:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
61 accumulator.append(header)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
62 accumulator = set(accumulator)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
63 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator}
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
64 return res_dict
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
65 elif mode == 'exact':
7
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
66 for keyword in query:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
67 try:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
68 len(fasta_dict[keyword])
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
69 accumulator.append(keyword)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
70 except KeyError:
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
71 pass
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
72 accumulator = set(accumulator)
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
73 res_dict = {k: fasta_dict[k] for k in fasta_dict if k in accumulator}
6c0aefd9fee3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 849d6d2087dadb81f1b790e3bcb5bda40c3c83af
artbio
parents: 6
diff changeset
74 return res_dict
1
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
75
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
76
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
77 def complement_fasta_dict(fasta_dict, subfasta_dict):
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
78 fasta_ids = list(fasta_dict.keys())
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
79 subfasta_ids = list(subfasta_dict.keys())
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
80 complement_ids = list(set(fasta_ids) - set(subfasta_ids))
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
81 sub_dict = {k: fasta_dict[k] for k in fasta_dict if k in complement_ids}
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
82 return sub_dict
1
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
83
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
84
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
85 def getquerylist(file):
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
86 querylist = []
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
87 for line in open(file, 'r'):
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
88 querylist.append(line.rstrip())
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
89 return querylist
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
90
ea8fde9c6f82 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit f527add7e7bace30b8bc67524ff1da1bf920ec29"
artbio
parents: 0
diff changeset
91
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
92 def buid_fasta_dict(fasta):
6
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
93 seq_dict = dict()
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
94 f = open(fasta, 'r')
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
95 content = f.read()
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
96 segmented_content = content.split('>')
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
97 segmented_content = segmented_content[1:]
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
98 for seq in segmented_content:
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
99 sliced_seq = seq.split('\n')
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
100 header = sliced_seq[0]
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
101 sliced_seq = sliced_seq[1:]
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
102 sequence = ''.join(sliced_seq)
d8fa616a228a "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 8384f1bb5378232bbe78319e06a3522674c7c1fe"
artbio
parents: 4
diff changeset
103 seq_dict[header] = sequence
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
104 return seq_dict
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
105
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
106
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
107 def write_fasta_result(fasta_dict, file):
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
108 line_length = 60
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
109 with open(file, 'w') as f:
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
110 for header in sorted(fasta_dict):
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
111 f.write('>%s\n' % header)
4
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
112 if len(fasta_dict[header]) <= line_length:
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
113 f.write('%s\n' % fasta_dict[header])
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
114 else:
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
115 for i in range(line_length, len(fasta_dict[header]),
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
116 line_length):
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
117 f.write('%s\n' % fasta_dict[header][i-line_length:i])
ba6c4aeb22ea "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit 3e1f8bb17e712d70e64ebb541775e9555acc038f"
artbio
parents: 3
diff changeset
118 f.write('%s\n' % fasta_dict[header][i:])
0
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
119
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
120
3
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
121 def __main__():
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
122 ''' main function '''
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
123 args = Parser()
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
124 fasta_dict = buid_fasta_dict(args.input)
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
125 if args.query_string:
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
126 query = args.query_string
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
127 elif args.query_file:
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
128 query = getquerylist(args.query_file)
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
129 if args.searchfor == 'with':
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
130 fasta_result_dict = parse_fasta_dict(query, fasta_dict, args.mode)
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
131 elif args.searchfor == 'without':
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
132 fasta_result_dict = complement_fasta_dict(fasta_dict, parse_fasta_dict(
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
133 query, fasta_dict,
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
134 args.mode))
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
135 write_fasta_result(fasta_result_dict, args.output)
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
136
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
137
c282a8a47dd9 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit d637de6c1090314bd34bdffc2fdf979cb55b870b"
artbio
parents: 2
diff changeset
138 if __name__ == '__main__':
0
e3aee4ba49c6 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cherry_pick_fasta commit a5e865d017e0434dae013565929ad5e6e5129fd3
artbio
parents:
diff changeset
139 __main__()