Previous changeset 0:796552c157de (2017-07-24) Next changeset 2:6059f4cb4cf2 (2019-10-14) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f |
modified:
extractSplitReads_BwaMem.py lumpy.xml pairend_distro.py |
b |
diff -r 796552c157de -r 1ed8619a5611 extractSplitReads_BwaMem.py --- a/extractSplitReads_BwaMem.py Mon Jul 24 08:03:17 2017 -0400 +++ b/extractSplitReads_BwaMem.py Wed Jul 26 18:17:01 2017 -0400 |
[ |
b'@@ -1,12 +1,11 @@\n #!/usr/bin/env python\n \n+import re\n import sys\n-import getopt\n-import string\n from optparse import OptionParser\n-import re\n+\n \n-def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):\n+def extractSplitsFromBwaMem(inFile, numSplits, includeDups, minNonOverlap):\n if inFile == "stdin":\n data = sys.stdin\n else:\n@@ -14,82 +13,89 @@\n for line in data:\n split = 0\n if line[0] == \'@\':\n- print line.strip()\n+ print(line.strip())\n continue\n samList = line.strip().split(\'\\t\')\n sam = SAM(samList)\n- if includeDups==0 and (1024 & sam.flag)==1024:\n+ if includeDups == 0 and (1024 & sam.flag) == 1024:\n continue\n for el in sam.tags:\n if "SA:" in el:\n- if(len(el.split(";")))<=numSplits:\n+ if(len(el.split(";"))) <= numSplits:\n split = 1\n mate = el.split(",")\n mateCigar = mate[3]\n mateFlag = int(0)\n- if mate[2]=="-": mateFlag = int(16)\n+ if mate[2] == "-":\n+ mateFlag = int(16)\n if split:\n read1 = sam.flag & 64\n- if read1 == 64: tag = "_1"\n- else: tag="_2"\n+ if read1 == 64:\n+ tag = "_1"\n+ else:\n+ tag = "_2"\n samList[0] = sam.query + tag\n readCigar = sam.cigar\n- readCigarOps = extractCigarOps(readCigar,sam.flag)\n+ readCigarOps = extractCigarOps(readCigar, sam.flag)\n readQueryPos = calcQueryPosFromCigar(readCigarOps)\n- mateCigarOps = extractCigarOps(mateCigar,mateFlag)\n+ mateCigarOps = extractCigarOps(mateCigar, mateFlag)\n mateQueryPos = calcQueryPosFromCigar(mateCigarOps)\n- overlap = calcQueryOverlap(readQueryPos.qsPos,readQueryPos.qePos,mateQueryPos.qsPos,mateQueryPos.qePos)\n+ overlap = calcQueryOverlap(readQueryPos.qsPos, readQueryPos.qePos,\n+ mateQueryPos.qsPos, mateQueryPos.qePos)\n nonOverlap1 = 1 + readQueryPos.qePos - readQueryPos.qsPos - overlap\n nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap\n mno = min(nonOverlap1, nonOverlap2)\n if mno >= minNonOverlap:\n- print "\\t".join(samList)\n+ print("\\t".join(samList))\n \n-#--------------------------------------------------------------------------------------------------\n+# -----------------------------------------------------------------------\n # functions\n-#--------------------------------------------------------------------------------------------------\n+# -----------------------------------------------------------------------\n+\n \n class SAM (object):\n """\n __very__ basic class for SAM input.\n """\n- def __init__(self, samList = []):\n+ def __init__(self, samList=[]):\n if len(samList) > 0:\n- self.query = samList[0]\n- self.flag = int(samList[1])\n- self.ref = samList[2]\n- self.pos = int(samList[3])\n- self.mapq = int(samList[4])\n- self.cigar = samList[5]\n- self.matRef = samList[6]\n- self.matePos = int(samList[7])\n- self.iSize = int(samList[8])\n- self.seq = samList[9]\n- self.qual = samList[10]\n- self.tags = samList[11:]#tags is a list of each tag:vtype:value sets\n- self.valid = 1\n+ self.query = samList[0]\n+ self.flag = int(samList[1])\n+ self.ref = samList[2]\n+ self.pos = int(samList[3])\n+ self.mapq = int(samList[4])\n+ self.cigar = samList[5]\n+ self.matRef = samList[6]\n+ self.matePos = int(samList[7])\n+ self.iSize = int(samList[8]'..b' qsPos += cigar.length\n qePos += cigar.length\n- qLen += cigar.length\n+ qLen += cigar.length\n elif opPosition > 0 and (cigar.op == \'H\' or cigar.op == \'S\'):\n- qLen += cigar.length\n+ qLen += cigar.length\n elif cigar.op == \'M\' or cigar.op == \'I\':\n qePos += cigar.length\n- qLen += cigar.length\n+ qLen += cigar.length\n opPosition += 1\n- d = queryPos(qsPos, qePos, qLen);\n+ d = queryPos(qsPos, qePos, qLen)\n return d\n \n+\n class cigarOp (object):\n """\n sturct to store a discrete CIGAR operations\n """\n def __init__(self, opLength, op):\n self.length = int(opLength)\n- self.op = op\n+ self.op = op\n+\n \n class queryPos (object):\n """\n@@ -153,50 +163,60 @@\n def __init__(self, qsPos, qePos, qLen):\n self.qsPos = int(qsPos)\n self.qePos = int(qePos)\n- self.qLen = int(qLen)\n+ self.qLen = int(qLen)\n \n \n-def calcQueryOverlap(s1,e1,s2,e2):\n+def calcQueryOverlap(s1, e1, s2, e2):\n o = 1 + min(e1, e2) - max(s1, s2)\n return max(0, o)\n \n ###############################################\n \n+\n class Usage(Exception):\n def __init__(self, msg):\n self.msg = msg\n \n+\n def main():\n-\n usage = """%prog -i <file>\n \n extractSplitReads_BwaMem v0.1.0\n Author: Ira Hall\n-Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates.\n+Description: Get split-read alignments from bwa-mem in lumpy compatible\n+format. Ignores reads marked as duplicates.\n Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.\n """\n parser = OptionParser(usage)\n \n parser.add_option("-i", "--inFile", dest="inFile",\n- help="A SAM file or standard input (-i stdin).",\n- metavar="FILE")\n- parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",\n- help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2",\n- metavar="INT")\n- parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,\n- help="Include alignments marked as duplicates. Default=False")\n- parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",\n- help="minimum non-overlap between split alignments on the query (default=20)",\n- metavar="INT")\n+ help="A SAM file or standard input (-i stdin).",\n+ metavar="FILE")\n+ parser.add_option("-n", "--numSplits", dest="numSplits", default=2,\n+ type="int",\n+ help=\'\'\'The maximum number of split-read mappings to\n+ allow per read. Reads with more are excluded.\n+ Default=2\'\'\', metavar="INT")\n+ parser.add_option("-d", "--includeDups", dest="includeDups",\n+ action="store_true", default=0,\n+ help=\'\'\'Include alignments marked as duplicates.\n+ Default=False\'\'\')\n+ parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap",\n+ default=20, type="int", help=\'\'\'minimum non-overlap between\n+ split alignments on the query (default=20)\'\'\',\n+ metavar="INT")\n (opts, args) = parser.parse_args()\n if opts.inFile is None:\n parser.print_help()\n print\n else:\n try:\n- extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap)\n+ extractSplitsFromBwaMem(opts.inFile, opts.numSplits,\n+ opts.includeDups, opts.minNonOverlap)\n except IOError as err:\n- sys.stderr.write("IOError " + str(err) + "\\n");\n+ sys.stderr.write("IOError " + str(err) + "\\n")\n return\n+\n+\n if __name__ == "__main__":\n sys.exit(main())\n' |
b |
diff -r 796552c157de -r 1ed8619a5611 lumpy.xml --- a/lumpy.xml Mon Jul 24 08:03:17 2017 -0400 +++ b/lumpy.xml Wed Jul 26 18:17:01 2017 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="lumpy" name="lumpy-sv" version="1.0.0"> +<tool id="lumpy" name="lumpy-sv" version="1.0.1"> <description>find structural variants</description> <requirements> <requirement type="package" version="0.2.13">lumpy-sv</requirement> |
b |
diff -r 796552c157de -r 1ed8619a5611 pairend_distro.py --- a/pairend_distro.py Mon Jul 24 08:03:17 2017 -0400 +++ b/pairend_distro.py Wed Jul 26 18:17:01 2017 -0400 |
[ |
@@ -9,9 +9,9 @@ # rl6sf@virginia.edu import sys +from optparse import OptionParser + import numpy as np -from operator import itemgetter -from optparse import OptionParser # some constants for sam/bam field ids SAM_FLAG = 1 @@ -20,32 +20,16 @@ SAM_ISIZE = 8 parser = OptionParser() - -parser.add_option("-r", - "--read_length", - type="int", - dest="read_length", - help="Read length") - -parser.add_option("-X", - dest="X", - type="int", - help="Number of stdevs from mean to extend") +parser.add_option("-r", "--read_length", type="int", dest="read_length", + help="Read length") +parser.add_option("-X", dest="X", type="int", + help="Number of stdevs from mean to extend") +parser.add_option("-N", dest="N", type="int", help="Number to sample") +parser.add_option("-o", dest="output_file", help="Output file") +parser.add_option("-m", dest="mads", type="int", default=10, + help='''Outlier cutoff in # of median absolute deviations + (unscaled, upper only)''') -parser.add_option("-N", - dest="N", - type="int", - help="Number to sample") - -parser.add_option("-o", - dest="output_file", - help="Output file") - -parser.add_option("-m", - dest="mads", - type="int", - default=10, - help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)") def unscaled_upper_mad(xs): """Return a tuple consisting of the median of xs followed by the @@ -96,7 +80,8 @@ # warn if very few elements in distribution min_elements = 1000 if len(L) < min_elements: - sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) + sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % + (len(L), min_elements)) mean = "NA" stdev = "NA" @@ -110,7 +95,7 @@ new_len = len(L) removed = c - new_len sys.stderr.write("Removed %d outliers with isize >= %d\n" % - (removed, upper_cutoff)) + (removed, upper_cutoff)) c = new_len mean = np.mean(L) @@ -125,7 +110,7 @@ for x in L: if (x >= start) and (x <= end): j = int(x - start) - H[j] = H[ int(x - start) ] + 1 + H[j] = H[int(x - start)] + 1 s += 1 f = open(options.output_file, 'w') @@ -133,8 +118,5 @@ for i in range(end - start): o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" f.write(o) - - f.close() - print('mean:' + str(mean) + '\tstdev:' + str(stdev)) |