Mercurial > repos > fubar > microsatbed
annotate find_str.py @ 19:db5523378e5c draft
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
author | fubar |
---|---|
date | Wed, 17 Jul 2024 07:40:00 +0000 |
parents | 264d79548d19 |
children | 410144c7b2d6 |
rev | line source |
---|---|
1
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
1 import argparse |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
2 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
3 import pytrf # 1.3.0 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
4 from pyfastx import Fastx # 0.5.2 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
5 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
6 """ |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
7 Allows all STR or those for a subset of motifs to be written to a bed file |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
8 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
9 """ |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
10 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
11 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
12 def write_ssrs(args): |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
13 """ |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
14 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
15 ssrs = pytrf.STRFinder(name, seq, 10, 6, 4, 3, 3, 3) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
16 NOTE: Dinucleotides GA and AG are reported separately by https://github.com/marbl/seqrequester. |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
17 The reversed pair STRs are about as common in the documentation sample. |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
18 Sequence read bias might be influenced by GC density or some other specific motif. |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
19 """ |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
20 bed = [] |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
21 specific = None |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
22 if args.specific: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
23 specific = args.specific.upper().split(",") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
24 fa = Fastx(args.fasta, uppercase=True) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
25 for name, seq in fa: |
19
db5523378e5c
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
17
diff
changeset
|
26 for ssr in pytrf.STRFinder( |
1
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
27 name, |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
28 seq, |
17
264d79548d19
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
16
diff
changeset
|
29 args.monomin, |
264d79548d19
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
16
diff
changeset
|
30 args.dimin, |
264d79548d19
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
16
diff
changeset
|
31 args.trimin, |
264d79548d19
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
16
diff
changeset
|
32 args.tetramin, |
264d79548d19
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
16
diff
changeset
|
33 args.pentamin, |
264d79548d19
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
16
diff
changeset
|
34 args.hexamin, |
19
db5523378e5c
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
17
diff
changeset
|
35 ): |
1
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
36 row = ( |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
37 ssr.chrom, |
19
db5523378e5c
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
17
diff
changeset
|
38 ssr.start, |
1
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
39 ssr.end, |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
40 ssr.motif, |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
41 ssr.repeat, |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
42 ssr.length, |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
43 ) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
44 # pytrf reports a 1 based start position so start-1 fixes the bed interval lengths |
19
db5523378e5c
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
fubar
parents:
17
diff
changeset
|
45 if args.specific and ssr.motif in specific: |
1
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
46 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
47 elif args.mono and len(ssr.motif) == 1: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
48 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
49 elif args.di and len(ssr.motif) == 2: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
50 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
51 elif args.tri and len(ssr.motif) == 3: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
52 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
53 elif args.tetra and len(ssr.motif) == 4: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
54 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
55 elif args.penta and len(ssr.motif) == 5: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
56 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
57 elif args.hexa and len(ssr.motif) == 6: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
58 bed.append(row) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
59 bed.sort() |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
60 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
61 with open(args.bed, "w") as outbed: |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
62 outbed.write("\n".join(obed)) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
63 outbed.write("\n") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
64 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
65 |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
66 if __name__ == "__main__": |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
67 parser = argparse.ArgumentParser() |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
68 a = parser.add_argument |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
69 a("--di", action="store_true") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
70 a("--tri", action="store_true") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
71 a("--tetra", action="store_true") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
72 a("--penta", action="store_true") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
73 a("--hexa", action="store_true") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
74 a("--mono", action="store_true") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
75 a("--dimin", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
76 a("--trimin", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
77 a("--tetramin", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
78 a("--pentamin", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
79 a("--hexamin", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
80 a("--monomin", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
81 a("-f", "--fasta", default="humsamp.fa") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
82 a("-b", "--bed", default="humsamp.bed") |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
83 a("--specific", default=None) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
84 a("--minreps", default=2, type=int) |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
85 args = parser.parse_args() |
1085e094cf5f
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 7ceb6658309a7ababe622b5d92e729e5470e22f0-dirty
fubar
parents:
diff
changeset
|
86 write_ssrs(args) |