# HG changeset patch
# User fubar
# Date 1721218095 0
# Node ID 410144c7b2d69e3203d66bc522726081f81f7d9f
# Parent db5523378e5c55e446c4821f41dd219a217917a4
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
diff -r db5523378e5c -r 410144c7b2d6 find_str.py
--- a/find_str.py Wed Jul 17 07:40:00 2024 +0000
+++ b/find_str.py Wed Jul 17 12:08:15 2024 +0000
@@ -1,4 +1,7 @@
import argparse
+import shutil
+
+import pybigtools
import pytrf # 1.3.0
from pyfastx import Fastx # 0.5.2
@@ -8,6 +11,15 @@
Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP.
"""
+def getDensity(name, bed, len, winwidth):
+ nwin = int(len / winwidth)
+ d = [0.0 for x in range(nwin+1)]
+ for b in bed:
+ nt = b[5]
+ bin = int(b[1]/winwidth)
+ d[bin] += nt
+ dw = [(name,x*winwidth,(x+1)*winwidth,float(d[x])) for x in range(nwin+1) if (x+1)*winwidth <= len]
+ return dw
def write_ssrs(args):
"""
@@ -18,11 +30,14 @@
Sequence read bias might be influenced by GC density or some other specific motif.
"""
bed = []
+ wig = []
+ chrlens = {}
specific = None
if args.specific:
specific = args.specific.upper().split(",")
fa = Fastx(args.fasta, uppercase=True)
for name, seq in fa:
+ cbed = []
for ssr in pytrf.STRFinder(
name,
seq,
@@ -43,24 +58,35 @@
)
# pytrf reports a 1 based start position so start-1 fixes the bed interval lengths
if args.specific and ssr.motif in specific:
- bed.append(row)
+ cbed.append(row)
elif args.mono and len(ssr.motif) == 1:
- bed.append(row)
+ cbed.append(row)
elif args.di and len(ssr.motif) == 2:
- bed.append(row)
+ cbed.append(row)
elif args.tri and len(ssr.motif) == 3:
- bed.append(row)
+ cbed.append(row)
elif args.tetra and len(ssr.motif) == 4:
- bed.append(row)
+ cbed.append(row)
elif args.penta and len(ssr.motif) == 5:
- bed.append(row)
+ cbed.append(row)
elif args.hexa and len(ssr.motif) == 6:
- bed.append(row)
- bed.sort()
- obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed]
- with open(args.bed, "w") as outbed:
- outbed.write("\n".join(obed))
- outbed.write("\n")
+ cbed.append(row)
+ bed += cbed
+ if args.bigwig:
+ chrlens[name] = len(seq)
+ w = getDensity(name, cbed, len(seq), args.winwidth)
+ wig += w
+ if args.bigwig:
+ wig.sort()
+ bw = pybigtools.open("temp.bw", 'w')
+ bw.write(chrlens,wig)
+ shutil.move("temp.bw", args.bed)
+ else:
+ bed.sort()
+ obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed]
+ with open(args.bed, "w") as outbed:
+ outbed.write("\n".join(obed))
+ outbed.write("\n")
if __name__ == "__main__":
@@ -80,6 +106,8 @@
a("--monomin", default=2, type=int)
a("-f", "--fasta", default="humsamp.fa")
a("-b", "--bed", default="humsamp.bed")
+ a("--bigwig", action="store_true")
+ a("--winwidth", default=128, type=int)
a("--specific", default=None)
a("--minreps", default=2, type=int)
args = parser.parse_args()
diff -r db5523378e5c -r 410144c7b2d6 humsamp.bed
Binary file humsamp.bed has changed
diff -r db5523378e5c -r 410144c7b2d6 microsatbed.xml
--- a/microsatbed.xml Wed Jul 17 07:40:00 2024 +0000
+++ b/microsatbed.xml Wed Jul 17 12:08:15 2024 +0000
@@ -4,6 +4,7 @@
python
pyfastx
pytrf
+ pybigtools
@@ -26,6 +27,10 @@
--bed '$bed'
#if $mode_cond.mode == "SPECIFIC":
--specific '$mode_cond.specific'
+ #elif $mode_cond.mode == "SPECIFICBW":
+ --bigwig
+ --winwidth '$mode_cond.winwidth'
+ --specific '$mode_cond.specific'
#else:
#if "MONO" in $mode_cond.subset:
--mono
@@ -52,6 +57,10 @@
--tetramin '$tetramin'
--pentamin '$pentamin'
--hexamin '$hexamin'
+ #if $mode_cond.mode == "SPECIFICBW":
+ --bigwig
+ --winwidth '$mode_cond.winwidth'
+ #end if
#end if
]]>
@@ -73,24 +82,41 @@
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -113,6 +139,8 @@
+
+
@@ -168,6 +196,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+