comparison find_str.py @ 24:94c5f834c0cc draft

planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
author fubar
date Fri, 19 Jul 2024 05:20:35 +0000
parents 45f690db0eaf
children 8d0b8a75350f
comparison
equal deleted inserted replaced
23:45f690db0eaf 24:94c5f834c0cc
1 import argparse 1 import argparse
2 import shutil 2 import shutil
3 3
4 import pybigtools 4 import pybigtools
5
6 import pytrf # 1.3.0 5 import pytrf # 1.3.0
7 from pyfastx import Fastx # 0.5.2 6 from pyfastx import Fastx # 0.5.2
8 7
9 """ 8 """
10 Allows all STR or those for a subset of motifs to be written to a bed file 9 Allows all STR or those for a subset of motifs to be written to a bed file
11 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. 10 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP.
12 """ 11 """
13 12
13
14 def getDensity(name, bed, chrlen, winwidth): 14 def getDensity(name, bed, chrlen, winwidth):
15 nwin = int(chrlen/winwidth) 15 nwin = int(chrlen / winwidth)
16 d = [0.0 for x in range(nwin+1)] 16 d = [0.0 for x in range(nwin + 1)]
17 for b in bed: 17 for b in bed:
18 nt = b[5] 18 nt = b[5]
19 bin = int(b[1]/winwidth) 19 bin = int(b[1] / winwidth)
20 d[bin] += nt 20 d[bin] += nt
21 dw = [(name,x*winwidth,(x+1)*winwidth,float(d[x])) for x in range(nwin+1) if (x+1)*winwidth <= chrlen] 21 dw = [
22 (name, (x * winwidth)+1, (x + 1) * winwidth, float(d[x]))
23 for x in range(nwin + 1)
24 if (x + 1) * winwidth <= chrlen
25 ]
22 return dw 26 return dw
27
23 28
24 def write_ssrs(args): 29 def write_ssrs(args):
25 """ 30 """
26 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats 31 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats
27 ssrs = pytrf.STRFinder(name, seq, 10, 6, 4, 3, 3, 3) 32 ssrs = pytrf.STRFinder(name, seq, 10, 6, 4, 3, 3, 3)
37 specific = args.specific.upper().split(",") 42 specific = args.specific.upper().split(",")
38 fa = Fastx(args.fasta, uppercase=True) 43 fa = Fastx(args.fasta, uppercase=True)
39 for name, seq in fa: 44 for name, seq in fa:
40 cbed = [] 45 cbed = []
41 for ssr in pytrf.STRFinder( 46 for ssr in pytrf.STRFinder(
42 name, 47 name,
43 seq, 48 seq,
44 args.monomin, 49 args.monomin,
45 args.dimin, 50 args.dimin,
46 args.trimin, 51 args.trimin,
47 args.tetramin, 52 args.tetramin,
48 args.pentamin, 53 args.pentamin,
49 args.hexamin, 54 args.hexamin,
50 ): 55 ):
51 row = ( 56 row = (
52 ssr.chrom, 57 ssr.chrom,
53 ssr.start, 58 ssr.start,
54 ssr.end, 59 ssr.end,
55 ssr.motif, 60 ssr.motif,
71 cbed.append(row) 76 cbed.append(row)
72 elif args.hexa and len(ssr.motif) == 6: 77 elif args.hexa and len(ssr.motif) == 6:
73 cbed.append(row) 78 cbed.append(row)
74 bed += cbed 79 bed += cbed
75 if args.bigwig: 80 if args.bigwig:
76 chrlens[name] = len(seq) 81 chrlen = len(seq)
77 w = getDensity(name, cbed, len(seq), args.winwidth) 82 chrlens[name] = chrlen
83 w = getDensity(name, cbed, chrlen, args.winwidth)
78 wig += w 84 wig += w
79 if args.bigwig: 85 if args.bigwig:
80 wig.sort() 86 wig.sort()
81 bw = pybigtools.open("temp.bw", 'w') 87 bw = pybigtools.open("temp.bw", "w")
82 bw.write(chrlens,wig) 88 bw.write(chrlens, wig)
89 bw.close()
83 shutil.move("temp.bw", args.bed) 90 shutil.move("temp.bw", args.bed)
84 else: 91 else:
85 bed.sort() 92 bed.sort()
86 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] 93 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed]
87 with open(args.bed, "w") as outbed: 94 with open(args.bed, "w") as outbed: