Mercurial > repos > fubar > microsatbed
comparison find_str.py @ 29:efc775ab30fe draft
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
author | fubar |
---|---|
date | Fri, 19 Jul 2024 23:32:59 +0000 |
parents | 26e9575c2c83 |
children | 53c4f91c6031 |
comparison
equal
deleted
inserted
replaced
28:4cb6cc083620 | 29:efc775ab30fe |
---|---|
1 import argparse | 1 import argparse |
2 import shutil | 2 import shutil |
3 import subprocess | 3 import subprocess |
4 | 4 |
5 import pybigtools | |
6 import pytrf # 1.3.0 | 5 import pytrf # 1.3.0 |
7 from pyfastx import Fastx # 0.5.2 | 6 from pyfastx import Fastx # 0.5.2 |
8 | 7 |
9 """ | 8 """ |
10 Allows all STR or those for a subset of motifs to be written to a bed file | 9 Allows all STR or those for a subset of motifs to be written to a bed file |
11 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 10 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. |
12 """ | 11 """ |
13 | 12 |
14 | 13 |
15 def getDensity(name, bed, chrlen, winwidth): | 14 def getDensity(name, bed, chrlen, winwidth): |
15 """ | |
16 pybigtools can write bigwigs and they are processed by other ucsc tools - but jb2 will not read them. | |
17 Swapped the conversion to use a bedgraph file processed by bedGraphToBigWig | |
18 """ | |
16 nwin = int(chrlen / winwidth) | 19 nwin = int(chrlen / winwidth) |
17 d = [0.0 for x in range(nwin + 1)] | 20 d = [0.0 for x in range(nwin + 1)] |
18 for b in bed: | 21 for b in bed: |
19 nt = b[5] | 22 nt = b[5] |
20 bin = int(b[1] / winwidth) | 23 bin = int(b[1] / winwidth) |
21 d[bin] += nt | 24 d[bin] += nt |
22 dw = [ | 25 bedg = [ |
23 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) | 26 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) |
24 for x in range(nwin + 1) | 27 for x in range(nwin + 1) |
25 if (x + 1) * winwidth <= chrlen | 28 if (x + 1) * winwidth <= chrlen |
26 ] | 29 ] |
27 return dw | 30 return bedg |
28 | 31 |
29 | 32 |
30 def write_ssrs(args): | 33 def write_ssrs(args): |
31 """ | 34 """ |
32 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats | 35 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats |
84 wig += w | 87 wig += w |
85 bed += cbed | 88 bed += cbed |
86 if args.bigwig: | 89 if args.bigwig: |
87 wig.sort() | 90 wig.sort() |
88 bedg = ['%s %d %d %.3f' % x for x in wig] | 91 bedg = ['%s %d %d %.3f' % x for x in wig] |
89 # bedg.insert(0,'track type=bedGraph') https://genomebrowser.wustl.edu/goldenPath/help/bigWig.html | |
90 with open("temp.bedg", "w") as bw: | 92 with open("temp.bedg", "w") as bw: |
91 bw.write('\n'.join(bedg)) | 93 bw.write('\n'.join(bedg)) |
92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] | 94 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] |
93 with open("temp.chromlen", "w") as cl: | 95 with open("temp.chromlen", "w") as cl: |
94 cl.write('\n'.join(chroms)) | 96 cl.write('\n'.join(chroms)) |
95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] | 97 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] |
96 subprocess.run(cmd) | 98 subprocess.run(cmd) |
97 #bw = pybigtools.open("temp.bw", "w") | |
98 #bw.write(chrlens, wig) | |
99 shutil.move("temp.bw", args.bed) | 99 shutil.move("temp.bw", args.bed) |
100 else: | 100 else: |
101 bed.sort() | 101 bed.sort() |
102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | 102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] |
103 with open(args.bed, "w") as outbed: | 103 with open(args.bed, "w") as outbed: |