annotate commons/core/parsing/BlatToGffForBesPaired.py @ 36:44d5973c188c

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 15:02:29 -0400
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 # http://www.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 import optparse
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 import os
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 import sys
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 import re
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 import datetime
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from commons.core.parsing.BlatParser import BlatParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from commons.core.seq.FastaUtils import FastaUtils
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 class BlatToGffForBesPaired(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 def __init__(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 pass
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 def setAttributesFromCmdLine(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 help = '\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 \nThis Script Launch BlatToGffForBesPaired.\n\n\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 Example 1: python BlatToGffForBesPaired.py -i blatResultsFile.tab -f besSequences.fasta -o outputFile.gff3\n\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 Example 2: python BlatToGffForBesPaired.py -i blatResultsFile.tab -f besSequences.fasta -o outputFile.gff3 -n muscadine:filtre1\n\n\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 Note 1: In blat input file, all BAC-Ends must be paired. In addition, they must be one above the other.\nFor example, if you have the BES MRRE1H032F08FM1 (forward), we must have the BES MRRE1H032F08RM1 (reverse) just after, like:\n\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 554\t26\t0\t0\t1\t16\t1\t17\t+\tMRRE1H032F08FM1\t606\t10\t606\tchr11\t19818926\t3725876\t3726473\t2\t553,27,\t10,579,\t3725876,3726446,\n\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 620\t23\t0\t0\t0\t0\t0\t0\t-\tMRRE1H032F08RM1\t643\t0\t643\tchr11\t19818926\t3794984\t3795627\t1\t643,\t0,\t3794984,\n\
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 Note 2: the header in Blat results output file must be present (5 lines).\n\n'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 parser.add_option( '-i', '--input', dest='inputBLAT', help='Blat Input File Name, with BES paired (1 Forward and 1 Reverse) [Format: tabular]', default= None )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 parser.add_option( '-f', '--fasta', dest='inputFASTA', help='Fasta Input File Name, with all sequences of BES [Format: fasta]', default= None )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 parser.add_option( '-n', '--methodname', dest='methodName', help='Method name in col. 3 [Default: None]', default= None )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 ( options, args ) = parser.parse_args()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 self._options = options
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 def checkOptions(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 if self._options.inputBLAT == '':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 raise Exception("ERROR: No Blat file specified for -i !")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 elif not os.path.exists(self._options.inputBLAT):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 raise Exception("ERROR: Blat Input File doesn't exist !")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 self._inputFileBlat = self._options.inputBLAT
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 if self._options.inputFASTA == '':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 raise Exception("ERROR: No Fasta file specified for -f !")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 elif not os.path.exists(self._options.inputFASTA):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 raise Exception("ERROR: Fasta Input File doesn't exist !")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 self._inputFileFasta = self._options.inputFASTA
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 if self._options.output == '':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 raise Exception("ERROR: No Output file specified for -o !")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 self._outputFileGFF = self._options.output
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 self._methodName = self._options.methodName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 def run(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 self.checkOptions()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 self._createGFFOutputFile()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 BLATFile = open(self._inputFileBlat, 'r')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 headerBlatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 headerBlatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 headerBlatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 headerBlatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 headerBlatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 blatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 numberLine = 6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 while blatLine != '':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 lGffLines = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 gffLineBes1, besName1, seqBes1, typeBes1 = self.convertBlatObjectToGffLine(blatLine, numberLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 lGffLines.append(gffLineBes1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 blatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 numberLine = numberLine + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 gffLineBes2, besName2, seqBes2, typeBes2 = self.convertBlatObjectToGffLine(blatLine, numberLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 lGffLines.append(gffLineBes2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 gffLineBac = self.createGffLineForBac(gffLineBes1, besName1, seqBes1, typeBes1, gffLineBes2, besName2, seqBes2, typeBes2, numberLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 lGffLines.append(gffLineBac)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112 if gffLineBac != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 self._printGFFLinesToOutputFile(lGffLines)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 blatLine = BLATFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 numberLine = numberLine + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 def convertBlatObjectToGffLine(self, blatLine, numberLine):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 iBlatHit = BlatParser()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 iBlatHit.setAttributesFromString(blatLine, numberLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 besName = iBlatHit.getQName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 seqBes = self.extractBesSequenceFromFastaFile(besName, numberLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 typeBes = ''
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 if re.match('^.+FM[0-9]$', besName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 typeBes = 'FM'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 elif re.match('^.+RM[0-9]$', besName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 typeBes = 'RM'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 col1 = iBlatHit.getTName()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 col2 = 'BlatToGffForBesPaired'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 if self._methodName == '' or self._methodName == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 col3 = 'BES'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 col3 = '%s:BES' % self._methodName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 col4 = iBlatHit.getTStart()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 col5 = iBlatHit.getTEnd()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 col6 = '.'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 col7 = '+'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 col8 = '.'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 col9 = 'ID=%s;Name=%s;bes_start=%s;bes_end=%s;bes_size=%s;muscadine_seq=%s' % (besName, besName, iBlatHit.getTStart(), iBlatHit.getTEnd(), iBlatHit.getTSize(), seqBes)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 return gffLine, iBlatHit.getQName(),seqBes, typeBes
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 def createGffLineForBac(self, gffLineBes1, besName1, seqBes1, typeBes1, gffLineBes2, besName2, seqBes2, typeBes2, numberLine):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 lGffLineBes1 = gffLineBes1.split('\t')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 lGffLineBes2 = gffLineBes2.split('\t')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 besName1 = self.getBesName(lGffLineBes1[8])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 besName2 = self.getBesName(lGffLineBes2[8])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 tBes1 = (lGffLineBes1[0], int(lGffLineBes1[3]), int(lGffLineBes1[4]))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 tBes2 = (lGffLineBes2[0], int(lGffLineBes2[3]), int(lGffLineBes2[4]))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 if self.checkBesNames(besName1, besName2, numberLine) == True and self.checkBesPositions(tBes1, tBes2) == True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 startBacPos, endBacPos = self.getBacPositions(tBes1, tBes2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 sizeBacPos = endBacPos - startBacPos + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 bacName = self.getBacName(besName1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 nameBesFM, seqBesFM, nameBesRM, seqBesRM = self.getBesFmAndRmNamesAndSequences(besName1, seqBes1, typeBes1, besName2, seqBes2, typeBes2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161 col1 = lGffLineBes1[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 col2 = 'BlatToGffForBesPaired'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 if self._methodName == '' or self._methodName == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 col3 = 'BAC'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 col3 = '%s:BAC' % self._methodName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 col4 = startBacPos
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 col5 = endBacPos
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169 col6 = '.'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 col7 = '.'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 col8 = '.'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 col9 = 'ID=%s;Name=%s;bac_start=%s;bac_end=%s;bac_size=%s;besFM_name=%s;muscadine_besFM_seq=%s;besRM_name=%s;muscadine_besRM_seq=%s' % (bacName, bacName, startBacPos, endBacPos, sizeBacPos, nameBesFM, seqBesFM, nameBesRM, seqBesRM)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 return gffLine
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 return None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177 def getBesFmAndRmNamesAndSequences(self, besName1, seqBes1, typeBes1, besName2, seqBes2, typeBes2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 if typeBes1 == 'FM' and typeBes2 == 'RM':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 return besName1, seqBes1, besName2, seqBes2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180 elif typeBes1== 'RM' and typeBes2 == 'FM':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181 return besName2, seqBes2, besName1, seqBes1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 def getBesName(self, col9):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 lCol9 = col9.split(';')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 ID = lCol9[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186 besName = ID[3:]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 return besName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 def getBacName(self, besName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190 bacName = besName[:-3]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 return bacName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 def checkBesNames(self, besName1, besName2, line):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 bacName1 = besName1[:-3]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 bacName2 = besName2[:-3]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 if bacName1 == bacName2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199 sys.stderr.write("WARNING: Lines %s and %s the two Bes (%s AND %s) do not belong to the same BAC !!!\n -> you have to filter this Blat file...\n" % (int(line)-1, line, besName1, besName2))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 def checkBesPositions(self, tBes1, tBes2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 if tBes1[0] == tBes2[0]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204 minBes1 = min(tBes1[1], tBes1[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 maxBes1 = max(tBes1[1], tBes1[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 minBes2 = min(tBes2[1], tBes2[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 maxBes2 = max(tBes2[1], tBes2[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 if (minBes1 < minBes2 and maxBes1 < minBes2) or (minBes2 < minBes1 and maxBes2 < minBes1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212 def getBacPositions(self, tBes1, tBes2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213 startBacPos = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 endBacPos = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 minBes1 = min(tBes1[1], tBes1[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216 maxBes1 = max(tBes1[1], tBes1[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 minBes2 = min(tBes2[1], tBes2[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 maxBes2 = max(tBes2[1], tBes2[2])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219 if minBes1 < minBes2:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 startBacPos = minBes1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221 endBacPos = maxBes2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223 startBacPos = minBes2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224 endBacPos = maxBes1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225 return startBacPos, endBacPos
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227 def extractBesSequenceFromFastaFile(self, besName, numberLine):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228 seq = ''
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 date = datetime.datetime.now()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230 date = date.strftime("%d%m%Y_%H%M%S")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 tmpFileName = 'tmp_BlatToGffForBesPaired_%s.fasta' % date
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232 iFastaUtils = FastaUtils()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233 iFastaUtils.dbExtractByPattern(besName, self._inputFileFasta, tmpFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235 if os.path.exists(tmpFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236 newFastaFile = open(tmpFileName, 'r')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 line = newFastaFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238 if line != '':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239 while line != '':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 if line[0] != '>':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 line = line.replace('\n', '')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 seq += line
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 line = newFastaFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244 newFastaFile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 os.remove(tmpFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246 return seq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247 os.remove(tmpFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249 sys.stderr.write("WARNING: At line %s, the BAC-Ends (%s) hasn't got sequence in fasta file (%s) !!\n" % (numberLine, besName, os.path.basename(self._inputFileFasta)))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250 return 'NA'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252 def _createGFFOutputFile(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253 GFFfile = open(self._outputFileGFF, 'w')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 GFFfile.write("##gff-version 3\n")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255 GFFfile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257 def _printGFFLinesToOutputFile(self, lLines):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258 GFFfile = open(self._outputFileGFF, 'a')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 for line in lLines:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260 GFFfile.write(line)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 GFFfile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263 if __name__ == '__main__':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264 iBlatToGffForBesPaired = BlatToGffForBesPaired()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265 iBlatToGffForBesPaired.setAttributesFromCmdLine()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266 iBlatToGffForBesPaired.run()