comparison find_str.py @ 29:efc775ab30fe draft

planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
author fubar
date Fri, 19 Jul 2024 23:32:59 +0000
parents 26e9575c2c83
children 53c4f91c6031
comparison
equal deleted inserted replaced
28:4cb6cc083620 29:efc775ab30fe
1 import argparse 1 import argparse
2 import shutil 2 import shutil
3 import subprocess 3 import subprocess
4 4
5 import pybigtools
6 import pytrf # 1.3.0 5 import pytrf # 1.3.0
7 from pyfastx import Fastx # 0.5.2 6 from pyfastx import Fastx # 0.5.2
8 7
9 """ 8 """
10 Allows all STR or those for a subset of motifs to be written to a bed file 9 Allows all STR or those for a subset of motifs to be written to a bed file
11 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. 10 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP.
12 """ 11 """
13 12
14 13
15 def getDensity(name, bed, chrlen, winwidth): 14 def getDensity(name, bed, chrlen, winwidth):
15 """
16 pybigtools can write bigwigs and they are processed by other ucsc tools - but jb2 will not read them.
17 Swapped the conversion to use a bedgraph file processed by bedGraphToBigWig
18 """
16 nwin = int(chrlen / winwidth) 19 nwin = int(chrlen / winwidth)
17 d = [0.0 for x in range(nwin + 1)] 20 d = [0.0 for x in range(nwin + 1)]
18 for b in bed: 21 for b in bed:
19 nt = b[5] 22 nt = b[5]
20 bin = int(b[1] / winwidth) 23 bin = int(b[1] / winwidth)
21 d[bin] += nt 24 d[bin] += nt
22 dw = [ 25 bedg = [
23 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) 26 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x]))
24 for x in range(nwin + 1) 27 for x in range(nwin + 1)
25 if (x + 1) * winwidth <= chrlen 28 if (x + 1) * winwidth <= chrlen
26 ] 29 ]
27 return dw 30 return bedg
28 31
29 32
30 def write_ssrs(args): 33 def write_ssrs(args):
31 """ 34 """
32 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats 35 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats
84 wig += w 87 wig += w
85 bed += cbed 88 bed += cbed
86 if args.bigwig: 89 if args.bigwig:
87 wig.sort() 90 wig.sort()
88 bedg = ['%s %d %d %.3f' % x for x in wig] 91 bedg = ['%s %d %d %.3f' % x for x in wig]
89 # bedg.insert(0,'track type=bedGraph') https://genomebrowser.wustl.edu/goldenPath/help/bigWig.html
90 with open("temp.bedg", "w") as bw: 92 with open("temp.bedg", "w") as bw:
91 bw.write('\n'.join(bedg)) 93 bw.write('\n'.join(bedg))
92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] 94 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()]
93 with open("temp.chromlen", "w") as cl: 95 with open("temp.chromlen", "w") as cl:
94 cl.write('\n'.join(chroms)) 96 cl.write('\n'.join(chroms))
95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] 97 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ]
96 subprocess.run(cmd) 98 subprocess.run(cmd)
97 #bw = pybigtools.open("temp.bw", "w")
98 #bw.write(chrlens, wig)
99 shutil.move("temp.bw", args.bed) 99 shutil.move("temp.bw", args.bed)
100 else: 100 else:
101 bed.sort() 101 bed.sort()
102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] 102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed]
103 with open(args.bed, "w") as outbed: 103 with open(args.bed, "w") as outbed: