Mercurial > repos > fubar > microsatbed
comparison find_str.py @ 24:94c5f834c0cc draft
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
author | fubar |
---|---|
date | Fri, 19 Jul 2024 05:20:35 +0000 |
parents | 45f690db0eaf |
children | 8d0b8a75350f |
comparison
equal
deleted
inserted
replaced
23:45f690db0eaf | 24:94c5f834c0cc |
---|---|
1 import argparse | 1 import argparse |
2 import shutil | 2 import shutil |
3 | 3 |
4 import pybigtools | 4 import pybigtools |
5 | |
6 import pytrf # 1.3.0 | 5 import pytrf # 1.3.0 |
7 from pyfastx import Fastx # 0.5.2 | 6 from pyfastx import Fastx # 0.5.2 |
8 | 7 |
9 """ | 8 """ |
10 Allows all STR or those for a subset of motifs to be written to a bed file | 9 Allows all STR or those for a subset of motifs to be written to a bed file |
11 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 10 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. |
12 """ | 11 """ |
13 | 12 |
13 | |
14 def getDensity(name, bed, chrlen, winwidth): | 14 def getDensity(name, bed, chrlen, winwidth): |
15 nwin = int(chrlen/winwidth) | 15 nwin = int(chrlen / winwidth) |
16 d = [0.0 for x in range(nwin+1)] | 16 d = [0.0 for x in range(nwin + 1)] |
17 for b in bed: | 17 for b in bed: |
18 nt = b[5] | 18 nt = b[5] |
19 bin = int(b[1]/winwidth) | 19 bin = int(b[1] / winwidth) |
20 d[bin] += nt | 20 d[bin] += nt |
21 dw = [(name,x*winwidth,(x+1)*winwidth,float(d[x])) for x in range(nwin+1) if (x+1)*winwidth <= chrlen] | 21 dw = [ |
22 (name, (x * winwidth)+1, (x + 1) * winwidth, float(d[x])) | |
23 for x in range(nwin + 1) | |
24 if (x + 1) * winwidth <= chrlen | |
25 ] | |
22 return dw | 26 return dw |
27 | |
23 | 28 |
24 def write_ssrs(args): | 29 def write_ssrs(args): |
25 """ | 30 """ |
26 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats | 31 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats |
27 ssrs = pytrf.STRFinder(name, seq, 10, 6, 4, 3, 3, 3) | 32 ssrs = pytrf.STRFinder(name, seq, 10, 6, 4, 3, 3, 3) |
37 specific = args.specific.upper().split(",") | 42 specific = args.specific.upper().split(",") |
38 fa = Fastx(args.fasta, uppercase=True) | 43 fa = Fastx(args.fasta, uppercase=True) |
39 for name, seq in fa: | 44 for name, seq in fa: |
40 cbed = [] | 45 cbed = [] |
41 for ssr in pytrf.STRFinder( | 46 for ssr in pytrf.STRFinder( |
42 name, | 47 name, |
43 seq, | 48 seq, |
44 args.monomin, | 49 args.monomin, |
45 args.dimin, | 50 args.dimin, |
46 args.trimin, | 51 args.trimin, |
47 args.tetramin, | 52 args.tetramin, |
48 args.pentamin, | 53 args.pentamin, |
49 args.hexamin, | 54 args.hexamin, |
50 ): | 55 ): |
51 row = ( | 56 row = ( |
52 ssr.chrom, | 57 ssr.chrom, |
53 ssr.start, | 58 ssr.start, |
54 ssr.end, | 59 ssr.end, |
55 ssr.motif, | 60 ssr.motif, |
71 cbed.append(row) | 76 cbed.append(row) |
72 elif args.hexa and len(ssr.motif) == 6: | 77 elif args.hexa and len(ssr.motif) == 6: |
73 cbed.append(row) | 78 cbed.append(row) |
74 bed += cbed | 79 bed += cbed |
75 if args.bigwig: | 80 if args.bigwig: |
76 chrlens[name] = len(seq) | 81 chrlen = len(seq) |
77 w = getDensity(name, cbed, len(seq), args.winwidth) | 82 chrlens[name] = chrlen |
83 w = getDensity(name, cbed, chrlen, args.winwidth) | |
78 wig += w | 84 wig += w |
79 if args.bigwig: | 85 if args.bigwig: |
80 wig.sort() | 86 wig.sort() |
81 bw = pybigtools.open("temp.bw", 'w') | 87 bw = pybigtools.open("temp.bw", "w") |
82 bw.write(chrlens,wig) | 88 bw.write(chrlens, wig) |
89 bw.close() | |
83 shutil.move("temp.bw", args.bed) | 90 shutil.move("temp.bw", args.bed) |
84 else: | 91 else: |
85 bed.sort() | 92 bed.sort() |
86 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | 93 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] |
87 with open(args.bed, "w") as outbed: | 94 with open(args.bed, "w") as outbed: |