diff find_str.py @ 24:94c5f834c0cc draft

planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
author fubar
date Fri, 19 Jul 2024 05:20:35 +0000
parents 45f690db0eaf
children 8d0b8a75350f
line wrap: on
line diff
--- a/find_str.py	Wed Jul 17 23:19:12 2024 +0000
+++ b/find_str.py	Fri Jul 19 05:20:35 2024 +0000
@@ -2,7 +2,6 @@
 import shutil
 
 import pybigtools
-
 import pytrf  # 1.3.0
 from pyfastx import Fastx  # 0.5.2
 
@@ -11,16 +10,22 @@
 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP.
 """
 
+
 def getDensity(name, bed, chrlen, winwidth):
-    nwin = int(chrlen/winwidth)
-    d = [0.0 for x in range(nwin+1)]
+    nwin = int(chrlen / winwidth)
+    d = [0.0 for x in range(nwin + 1)]
     for b in bed:
         nt = b[5]
-        bin = int(b[1]/winwidth)
+        bin = int(b[1] / winwidth)
         d[bin] += nt
-    dw = [(name,x*winwidth,(x+1)*winwidth,float(d[x])) for x in range(nwin+1) if (x+1)*winwidth <= chrlen]
+    dw = [
+        (name, (x * winwidth)+1, (x + 1) * winwidth, float(d[x]))
+        for x in range(nwin + 1)
+        if  (x + 1) * winwidth <= chrlen
+    ]
     return dw
 
+
 def write_ssrs(args):
     """
     The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats
@@ -39,15 +44,15 @@
     for name, seq in fa:
         cbed = []
         for ssr in pytrf.STRFinder(
-                name,
-                seq,
-                args.monomin,
-                args.dimin,
-                args.trimin,
-                args.tetramin,
-                args.pentamin,
-                args.hexamin,
-            ):
+            name,
+            seq,
+            args.monomin,
+            args.dimin,
+            args.trimin,
+            args.tetramin,
+            args.pentamin,
+            args.hexamin,
+        ):
             row = (
                 ssr.chrom,
                 ssr.start,
@@ -73,13 +78,15 @@
                 cbed.append(row)
         bed += cbed
         if args.bigwig:
-            chrlens[name] = len(seq)
-            w = getDensity(name, cbed, len(seq), args.winwidth)
+            chrlen = len(seq)
+            chrlens[name] = chrlen
+            w = getDensity(name, cbed, chrlen, args.winwidth)
             wig += w
     if args.bigwig:
         wig.sort()
-        bw = pybigtools.open("temp.bw", 'w')
-        bw.write(chrlens,wig)
+        bw = pybigtools.open("temp.bw", "w")
+        bw.write(chrlens, wig)
+        bw.close()
         shutil.move("temp.bw", args.bed)
     else:
         bed.sort()