Repository 'lumpy_sv'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/lumpy_sv

Changeset 1:1ed8619a5611 (2017-07-26)
Previous changeset 0:796552c157de (2017-07-24) Next changeset 2:6059f4cb4cf2 (2019-10-14)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f
modified:
extractSplitReads_BwaMem.py
lumpy.xml
pairend_distro.py
b
diff -r 796552c157de -r 1ed8619a5611 extractSplitReads_BwaMem.py
--- a/extractSplitReads_BwaMem.py Mon Jul 24 08:03:17 2017 -0400
+++ b/extractSplitReads_BwaMem.py Wed Jul 26 18:17:01 2017 -0400
[
b'@@ -1,12 +1,11 @@\n #!/usr/bin/env python\n \n+import re\n import sys\n-import getopt\n-import string\n from optparse import OptionParser\n-import re\n+\n \n-def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):\n+def extractSplitsFromBwaMem(inFile, numSplits, includeDups, minNonOverlap):\n     if inFile == "stdin":\n         data = sys.stdin\n     else:\n@@ -14,82 +13,89 @@\n     for line in data:\n         split = 0\n         if line[0] == \'@\':\n-            print line.strip()\n+            print(line.strip())\n             continue\n         samList = line.strip().split(\'\\t\')\n         sam = SAM(samList)\n-        if includeDups==0 and (1024 & sam.flag)==1024:\n+        if includeDups == 0 and (1024 & sam.flag) == 1024:\n             continue\n         for el in sam.tags:\n             if "SA:" in el:\n-                if(len(el.split(";")))<=numSplits:\n+                if(len(el.split(";"))) <= numSplits:\n                     split = 1\n                     mate = el.split(",")\n                     mateCigar = mate[3]\n                     mateFlag = int(0)\n-                    if mate[2]=="-": mateFlag = int(16)\n+                    if mate[2] == "-":\n+                        mateFlag = int(16)\n         if split:\n             read1 = sam.flag & 64\n-            if read1 == 64: tag = "_1"\n-            else: tag="_2"\n+            if read1 == 64:\n+                tag = "_1"\n+            else:\n+                tag = "_2"\n             samList[0] = sam.query + tag\n             readCigar = sam.cigar\n-            readCigarOps = extractCigarOps(readCigar,sam.flag)\n+            readCigarOps = extractCigarOps(readCigar, sam.flag)\n             readQueryPos = calcQueryPosFromCigar(readCigarOps)\n-            mateCigarOps = extractCigarOps(mateCigar,mateFlag)\n+            mateCigarOps = extractCigarOps(mateCigar, mateFlag)\n             mateQueryPos = calcQueryPosFromCigar(mateCigarOps)\n-            overlap = calcQueryOverlap(readQueryPos.qsPos,readQueryPos.qePos,mateQueryPos.qsPos,mateQueryPos.qePos)\n+            overlap = calcQueryOverlap(readQueryPos.qsPos, readQueryPos.qePos,\n+                                       mateQueryPos.qsPos, mateQueryPos.qePos)\n             nonOverlap1 = 1 + readQueryPos.qePos - readQueryPos.qsPos - overlap\n             nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap\n             mno = min(nonOverlap1, nonOverlap2)\n             if mno >= minNonOverlap:\n-                print "\\t".join(samList)\n+                print("\\t".join(samList))\n \n-#--------------------------------------------------------------------------------------------------\n+# -----------------------------------------------------------------------\n # functions\n-#--------------------------------------------------------------------------------------------------\n+# -----------------------------------------------------------------------\n+\n \n class SAM (object):\n     """\n     __very__ basic class for SAM input.\n     """\n-    def __init__(self, samList = []):\n+    def __init__(self, samList=[]):\n         if len(samList) > 0:\n-            self.query    = samList[0]\n-            self.flag     = int(samList[1])\n-            self.ref      = samList[2]\n-            self.pos      = int(samList[3])\n-            self.mapq     = int(samList[4])\n-            self.cigar    = samList[5]\n-            self.matRef   = samList[6]\n-            self.matePos  = int(samList[7])\n-            self.iSize    = int(samList[8])\n-            self.seq      = samList[9]\n-            self.qual     = samList[10]\n-            self.tags     = samList[11:]#tags is a list of each tag:vtype:value sets\n-            self.valid    = 1\n+            self.query = samList[0]\n+            self.flag = int(samList[1])\n+            self.ref = samList[2]\n+            self.pos = int(samList[3])\n+            self.mapq = int(samList[4])\n+            self.cigar = samList[5]\n+            self.matRef = samList[6]\n+            self.matePos = int(samList[7])\n+            self.iSize = int(samList[8]'..b'   qsPos += cigar.length\n             qePos += cigar.length\n-            qLen  += cigar.length\n+            qLen += cigar.length\n         elif opPosition > 0 and (cigar.op == \'H\' or cigar.op == \'S\'):\n-            qLen  += cigar.length\n+            qLen += cigar.length\n         elif cigar.op == \'M\' or cigar.op == \'I\':\n             qePos += cigar.length\n-            qLen  += cigar.length\n+            qLen += cigar.length\n             opPosition += 1\n-    d = queryPos(qsPos, qePos, qLen);\n+    d = queryPos(qsPos, qePos, qLen)\n     return d\n \n+\n class cigarOp (object):\n     """\n     sturct to store a discrete CIGAR operations\n     """\n     def __init__(self, opLength, op):\n         self.length = int(opLength)\n-        self.op     = op\n+        self.op = op\n+\n \n class queryPos (object):\n     """\n@@ -153,50 +163,60 @@\n     def __init__(self, qsPos, qePos, qLen):\n         self.qsPos = int(qsPos)\n         self.qePos = int(qePos)\n-        self.qLen  = int(qLen)\n+        self.qLen = int(qLen)\n \n \n-def calcQueryOverlap(s1,e1,s2,e2):\n+def calcQueryOverlap(s1, e1, s2, e2):\n     o = 1 + min(e1, e2) - max(s1, s2)\n     return max(0, o)\n \n ###############################################\n \n+\n class Usage(Exception):\n     def __init__(self, msg):\n         self.msg = msg\n \n+\n def main():\n-\n     usage = """%prog -i <file>\n \n extractSplitReads_BwaMem v0.1.0\n Author: Ira Hall\n-Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates.\n+Description: Get split-read alignments from bwa-mem in lumpy compatible\n+format. Ignores reads marked as duplicates.\n Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.\n     """\n     parser = OptionParser(usage)\n \n     parser.add_option("-i", "--inFile", dest="inFile",\n-        help="A SAM file or standard input (-i stdin).",\n-        metavar="FILE")\n-    parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",\n-        help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2",\n-        metavar="INT")\n-    parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,\n-        help="Include alignments marked as duplicates. Default=False")\n-    parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",\n-        help="minimum non-overlap between split alignments on the query (default=20)",\n-        metavar="INT")\n+                      help="A SAM file or standard input (-i stdin).",\n+                      metavar="FILE")\n+    parser.add_option("-n", "--numSplits", dest="numSplits", default=2,\n+                      type="int",\n+                      help=\'\'\'The maximum number of split-read mappings to\n+                      allow per read. Reads with more are excluded.\n+                      Default=2\'\'\', metavar="INT")\n+    parser.add_option("-d", "--includeDups", dest="includeDups",\n+                      action="store_true", default=0,\n+                      help=\'\'\'Include alignments marked as duplicates.\n+                      Default=False\'\'\')\n+    parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap",\n+                      default=20, type="int", help=\'\'\'minimum non-overlap between\n+                      split alignments on the query (default=20)\'\'\',\n+                      metavar="INT")\n     (opts, args) = parser.parse_args()\n     if opts.inFile is None:\n         parser.print_help()\n         print\n     else:\n         try:\n-            extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap)\n+            extractSplitsFromBwaMem(opts.inFile, opts.numSplits,\n+                                    opts.includeDups, opts.minNonOverlap)\n         except IOError as err:\n-            sys.stderr.write("IOError " + str(err) + "\\n");\n+            sys.stderr.write("IOError " + str(err) + "\\n")\n             return\n+\n+\n if __name__ == "__main__":\n     sys.exit(main())\n'
b
diff -r 796552c157de -r 1ed8619a5611 lumpy.xml
--- a/lumpy.xml Mon Jul 24 08:03:17 2017 -0400
+++ b/lumpy.xml Wed Jul 26 18:17:01 2017 -0400
b
@@ -1,4 +1,4 @@
-<tool id="lumpy" name="lumpy-sv" version="1.0.0">
+<tool id="lumpy" name="lumpy-sv" version="1.0.1">
     <description>find structural variants</description>
     <requirements>
         <requirement type="package" version="0.2.13">lumpy-sv</requirement>
b
diff -r 796552c157de -r 1ed8619a5611 pairend_distro.py
--- a/pairend_distro.py Mon Jul 24 08:03:17 2017 -0400
+++ b/pairend_distro.py Wed Jul 26 18:17:01 2017 -0400
[
@@ -9,9 +9,9 @@
 #  rl6sf@virginia.edu
 
 import sys
+from optparse import OptionParser
+
 import numpy as np
-from operator import itemgetter
-from optparse import OptionParser
 
 # some constants for sam/bam field ids
 SAM_FLAG = 1
@@ -20,32 +20,16 @@
 SAM_ISIZE = 8
 
 parser = OptionParser()
-
-parser.add_option("-r",
-    "--read_length",
-    type="int",
-    dest="read_length",
-    help="Read length")
-
-parser.add_option("-X",
-    dest="X",
-    type="int",
-    help="Number of stdevs from mean to extend")
+parser.add_option("-r", "--read_length", type="int", dest="read_length",
+                  help="Read length")
+parser.add_option("-X", dest="X", type="int",
+                  help="Number of stdevs from mean to extend")
+parser.add_option("-N", dest="N", type="int", help="Number to sample")
+parser.add_option("-o", dest="output_file", help="Output file")
+parser.add_option("-m", dest="mads", type="int", default=10,
+                  help='''Outlier cutoff in # of median absolute deviations
+                          (unscaled, upper only)''')
 
-parser.add_option("-N",
-    dest="N",
-    type="int",
-    help="Number to sample")
-
-parser.add_option("-o",
-    dest="output_file",
-    help="Output file")
-
-parser.add_option("-m",
-    dest="mads",
-    type="int",
-    default=10,
-    help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)")
 
 def unscaled_upper_mad(xs):
     """Return a tuple consisting of the median of xs followed by the
@@ -96,7 +80,8 @@
 # warn if very few elements in distribution
 min_elements = 1000
 if len(L) < min_elements:
-    sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements))
+    sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" %
+                     (len(L), min_elements))
     mean = "NA"
     stdev = "NA"
 
@@ -110,7 +95,7 @@
     new_len = len(L)
     removed = c - new_len
     sys.stderr.write("Removed %d outliers with isize >= %d\n" %
-        (removed, upper_cutoff))
+                     (removed, upper_cutoff))
     c = new_len
 
     mean = np.mean(L)
@@ -125,7 +110,7 @@
     for x in L:
         if (x >= start) and (x <= end):
             j = int(x - start)
-            H[j] = H[ int(x - start) ] + 1
+            H[j] = H[int(x - start)] + 1
             s += 1
 
     f = open(options.output_file, 'w')
@@ -133,8 +118,5 @@
     for i in range(end - start):
         o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n"
         f.write(o)
-
-
     f.close()
-
 print('mean:' + str(mean) + '\tstdev:' + str(stdev))