Mercurial > repos > drosofff > lumpy
annotate extractSplitReads_BwaMem.py @ 0:8b3daa745d9b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
| author | drosofff | 
|---|---|
| date | Tue, 06 Dec 2016 05:46:28 -0500 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 0 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 2 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 3 import sys | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 4 import getopt | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 5 import string | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 6 from optparse import OptionParser | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 7 import re | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 8 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 9 def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 10 if inFile == "stdin": | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 11 data = sys.stdin | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 12 else: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 13 data = open(inFile, 'r') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 14 for line in data: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 15 split = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 16 if line[0] == '@': | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 17 print line.strip() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 18 continue | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 19 samList = line.strip().split('\t') | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 20 sam = SAM(samList) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 21 if includeDups==0 and (1024 & sam.flag)==1024: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 22 continue | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 23 for el in sam.tags: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 24 if "SA:" in el: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 25 if(len(el.split(";")))<=numSplits: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 26 split = 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 27 mate = el.split(",") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 28 mateCigar = mate[3] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 29 mateFlag = int(0) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 30 if mate[2]=="-": mateFlag = int(16) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 31 if split: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 32 read1 = sam.flag & 64 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 33 if read1 == 64: tag = "_1" | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 34 else: tag="_2" | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 35 samList[0] = sam.query + tag | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 36 readCigar = sam.cigar | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 37 readCigarOps = extractCigarOps(readCigar,sam.flag) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 38 readQueryPos = calcQueryPosFromCigar(readCigarOps) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 39 mateCigarOps = extractCigarOps(mateCigar,mateFlag) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 40 mateQueryPos = calcQueryPosFromCigar(mateCigarOps) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 41 overlap = calcQueryOverlap(readQueryPos.qsPos,readQueryPos.qePos,mateQueryPos.qsPos,mateQueryPos.qePos) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 42 nonOverlap1 = 1 + readQueryPos.qePos - readQueryPos.qsPos - overlap | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 43 nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 44 mno = min(nonOverlap1, nonOverlap2) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 45 if mno >= minNonOverlap: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 46 print "\t".join(samList) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 47 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 48 #-------------------------------------------------------------------------------------------------- | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 49 # functions | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 50 #-------------------------------------------------------------------------------------------------- | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 51 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 52 class SAM (object): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 53 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 54 __very__ basic class for SAM input. | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 55 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 56 def __init__(self, samList = []): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 57 if len(samList) > 0: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 58 self.query = samList[0] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 59 self.flag = int(samList[1]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 60 self.ref = samList[2] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 61 self.pos = int(samList[3]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 62 self.mapq = int(samList[4]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 63 self.cigar = samList[5] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 64 self.matRef = samList[6] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 65 self.matePos = int(samList[7]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 66 self.iSize = int(samList[8]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 67 self.seq = samList[9] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 68 self.qual = samList[10] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 69 self.tags = samList[11:]#tags is a list of each tag:vtype:value sets | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 70 self.valid = 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 71 else: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 72 self.valid = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 73 self.query = 'null' | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 74 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 75 def extractTagValue (self, tagID): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 76 for tag in self.tags: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 77 tagParts = tag.split(':', 2); | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 78 if (tagParts[0] == tagID): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 79 if (tagParts[1] == 'i'): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 80 return int(tagParts[2]); | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 81 elif (tagParts[1] == 'H'): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 82 return int(tagParts[2],16); | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 83 return tagParts[2]; | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 84 return None; | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 85 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 86 #----------------------------------------------- | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 87 cigarPattern = '([0-9]+[MIDNSHP])' | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 88 cigarSearch = re.compile(cigarPattern) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 89 atomicCigarPattern = '([0-9]+)([MIDNSHP])' | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 90 atomicCigarSearch = re.compile(atomicCigarPattern) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 91 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 92 def extractCigarOps(cigar,flag): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 93 if (cigar == "*"): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 94 cigarOps = [] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 95 elif (flag & 0x0010): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 96 cigarOpStrings = cigarSearch.findall(cigar) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 97 cigarOps = [] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 98 for opString in cigarOpStrings: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 99 cigarOpList = atomicCigarSearch.findall(opString) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 100 # print cigarOpList | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 101 # "struct" for the op and it's length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 102 cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 103 # add to the list of cigarOps | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 104 cigarOps.append(cigar) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 105 cigarOps = cigarOps | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 106 cigarOps.reverse() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 107 ##do in reverse order because negative strand## | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 108 else: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 109 cigarOpStrings = cigarSearch.findall(cigar) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 110 cigarOps = [] | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 111 for opString in cigarOpStrings: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 112 cigarOpList = atomicCigarSearch.findall(opString) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 113 # "struct" for the op and it's length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 114 cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1]) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 115 # add to the list of cigarOps | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 116 cigarOps.append(cigar) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 117 # cigarOps = cigarOps | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 118 return(cigarOps) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 119 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 120 def calcQueryPosFromCigar(cigarOps): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 121 qsPos = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 122 qePos = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 123 qLen = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 124 # if first op is a H, need to shift start position | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 125 # the opPosition counter sees if the for loop is looking at the first index of the cigar object | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 126 opPosition = 0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 127 for cigar in cigarOps: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 128 if opPosition == 0 and (cigar.op == 'H' or cigar.op == 'S'): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 129 qsPos += cigar.length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 130 qePos += cigar.length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 131 qLen += cigar.length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 132 elif opPosition > 0 and (cigar.op == 'H' or cigar.op == 'S'): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 133 qLen += cigar.length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 134 elif cigar.op == 'M' or cigar.op == 'I': | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 135 qePos += cigar.length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 136 qLen += cigar.length | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 137 opPosition += 1 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 138 d = queryPos(qsPos, qePos, qLen); | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 139 return d | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 140 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 141 class cigarOp (object): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 142 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 143 sturct to store a discrete CIGAR operations | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 144 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 145 def __init__(self, opLength, op): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 146 self.length = int(opLength) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 147 self.op = op | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 148 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 149 class queryPos (object): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 150 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 151 struct to store the start and end positions of query CIGAR operations | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 152 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 153 def __init__(self, qsPos, qePos, qLen): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 154 self.qsPos = int(qsPos) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 155 self.qePos = int(qePos) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 156 self.qLen = int(qLen) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 157 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 158 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 159 def calcQueryOverlap(s1,e1,s2,e2): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 160 o = 1 + min(e1, e2) - max(s1, s2) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 161 return max(0, o) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 162 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 163 ############################################### | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 164 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 165 class Usage(Exception): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 166 def __init__(self, msg): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 167 self.msg = msg | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 168 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 169 def main(): | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 170 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 171 usage = """%prog -i <file> | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 172 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 173 extractSplitReads_BwaMem v0.1.0 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 174 Author: Ira Hall | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 175 Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates. | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 176 Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405. | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 177 """ | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 178 parser = OptionParser(usage) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 179 | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 180 parser.add_option("-i", "--inFile", dest="inFile", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 181 help="A SAM file or standard input (-i stdin).", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 182 metavar="FILE") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 183 parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 184 help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 185 metavar="INT") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 186 parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0, | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 187 help="Include alignments marked as duplicates. Default=False") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 188 parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 189 help="minimum non-overlap between split alignments on the query (default=20)", | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 190 metavar="INT") | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 191 (opts, args) = parser.parse_args() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 192 if opts.inFile is None: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 193 parser.print_help() | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 194 print | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 195 else: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 196 try: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 197 extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap) | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 198 except IOError as err: | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 199 sys.stderr.write("IOError " + str(err) + "\n"); | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 200 return | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 201 if __name__ == "__main__": | 
| 
8b3daa745d9b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy commit c0bfc4b2215705e1b5fd1d4e60b1d72e5da13c92
 drosofff parents: diff
changeset | 202 sys.exit(main()) | 
