comparison commons/core/parsing/CrossSsrAndBesMappedByBlatToGff.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 import os
33 import optparse
34 from commons.core.parsing.SsrParser import SsrParser
35 from commons.core.parsing.BlatParser import BlatParser
36
37 class CrossSsrAndBesMappedByBlatToGff(object):
38
39
40 def __init__(self):
41 self._inputFileSSR = ''
42 self._inputFileBlat = ''
43 self._outputFileGFF = ''
44
45 def setAttributesFromCmdLine(self):
46 help = '\
47 \nThis Script Launch CrossSsrAndBesMappedByBlatToGff.\n\n\
48 Example 1: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3\n\
49 Example 2: python CrossSsrAndBesMappedByBlatToGff.py -s ssrResultsFile.tab -b blatResultsFile.tab -o outputFile.gff3 -n muscadine:filtre1\n\n'
50
51 parser = optparse.OptionParser(usage= help, version="CovertSamToFastq.py v1.0")
52 parser.add_option( '-s', '--ssr', dest='inputSSR', help='SSR Input File Name [Format: tabular]', default= None )
53 parser.add_option( '-b', '--blat', dest='inputBLAT', help='Blat Input File Name [Format: tabular]', default= None )
54 parser.add_option( '-o', '--output', dest='output', help='Output File Name [Format: GFF3]', default= None )
55 parser.add_option( '-n', '--methodName', dest='methodName', help='Method name in col. 3 [Default: None]', default= None )
56 ( options, args ) = parser.parse_args()
57 self.options = options
58
59 def checkOptions(self):
60 if self.options.inputSSR == '':
61 raise Exception("ERROR: No SSR file specified for -s !")
62 elif not os.path.exists(self.options.inputSSR):
63 raise Exception("ERROR: SSR Input File doesn't exist !")
64 else:
65 self._inputFileSSR = self.options.inputSSR
66
67 if self.options.inputBLAT == '':
68 raise Exception("ERROR: No Blat file specified for -b !")
69 elif not os.path.exists(self.options.inputBLAT):
70 raise Exception("ERROR: Blat Input File doesn't exist !")
71 else:
72 self._inputFileBlat = self.options.inputBLAT
73
74 if self.options.output == '':
75 raise Exception("ERROR: No Output file specified for -o !")
76 else:
77 self._outputFileGFF = self.options.output
78
79 self._methodName = self.options.methodName
80
81 def run(self):
82 self.checkOptions()
83 self._createGFFOutputFile()
84
85 dictSsrParser = {}
86 dictSsrParser = self.createDictOfSsrParser(dictSsrParser)
87
88 BLATFile = open(self._inputFileBlat, 'r')
89
90 headerBlatLine = BLATFile.readline()
91 headerBlatLine = BLATFile.readline()
92 headerBlatLine = BLATFile.readline()
93 headerBlatLine = BLATFile.readline()
94 headerBlatLine = BLATFile.readline()
95 blatLine = BLATFile.readline()
96 numberLine = 6
97 while blatLine != '' and blatLine != '\n':
98 thisBlatHit = BlatParser()
99 thisBlatHit.setAttributesFromString(blatLine, numberLine)
100 besName = thisBlatHit.getQName()
101
102 if besName in dictSsrParser:
103 lLinesToPrint = self.createListOfGFFLinesForThisBesWithSSR(thisBlatHit, dictSsrParser)
104 self._printGFFLinesToOutputFile(lLinesToPrint)
105
106 blatLine = BLATFile.readline()
107 numberLine = numberLine + 1
108
109 BLATFile.close()
110
111 def createDictOfSsrParser(self, dictSsrParser):
112 dictSsrParser = {}
113 SSRFile = open(self._inputFileSSR, 'r')
114
115 header = SSRFile.readline()
116 line = SSRFile.readline()
117 numberLine = 2
118
119 while line != '' and line != '\n':
120 thisSSRHit = SsrParser()
121 thisSSRHit.setAttributesFromString(line, numberLine)
122
123 BESName = thisSSRHit.getBesName()
124 if not BESName in dictSsrParser:
125 list = [thisSSRHit]
126 dictSsrParser[BESName] = list
127 else:
128 list = dictSsrParser[BESName]
129 list.append(thisSSRHit)
130 dictSsrParser[BESName] = list
131
132 line = SSRFile.readline()
133 numberLine = numberLine + 1
134
135 SSRFile.close()
136 return dictSsrParser
137
138 def createListOfGFFLinesForThisBesWithSSR(self, BlatHitObject, dictSsrParser):
139 listGffLines = []
140
141 besNameToKeep = BlatHitObject.getQName()
142 lOfSSRHitObject = dictSsrParser[besNameToKeep]
143
144 for SSRHitObject in lOfSSRHitObject:
145 posSSRStart = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrStart(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand())
146 posSSREnd = self.convertSSRPositionsToChromPositions(SSRHitObject.getSsrEnd(), BlatHitObject.getTStart(), BlatHitObject.getTEnd(), BlatHitObject.getStrand())
147 ssrSeq = self.getSsrSeq(SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber())
148
149 col1 = BlatHitObject.getTName()
150 col2 = 'CrossSsrAndBesAlignedByBlat'
151 if self._methodName != '' and self._methodName != None:
152 col3 = '%s:SSR' %self._methodName
153 else:
154 col3 = 'SSR'
155 col4 = posSSRStart
156 col5 = posSSREnd
157 col6 = '.'
158 col7 = BlatHitObject.getStrand()
159 col8 = '.'
160 col9 = 'ID=SSR_%s_%s;Name=SSR_%s_%s;bes_name=%s;bes_size=%s;bes_matchstart=%s;bes_matchend=%s;bes_redundancy=%s;ssr_type=%s;ssr_motif=%s;ssr_motif_number=%s;ssr_start=%s;ssr_end=%s;muscadine_seq=%s' % (besNameToKeep, SSRHitObject.getBesRedundancy(),
161 besNameToKeep, SSRHitObject.getBesRedundancy(),
162 besNameToKeep, BlatHitObject.getQSize(),
163 BlatHitObject.getQStart(), BlatHitObject.getQEnd(),
164 SSRHitObject.getBesRedundancy(), SSRHitObject.getSsrNbNucleotides(),
165 SSRHitObject.getSsrMotif(), SSRHitObject.getSsrMotifNumber(),
166 SSRHitObject.getSsrStart(), SSRHitObject.getSsrEnd(), ssrSeq)
167 gffLine = '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (col1, col2, col3, col4, col5, col6, col7, col8, col9)
168 listGffLines.append(gffLine)
169
170 return listGffLines
171
172 def convertSSRPositionsToChromPositions(self, ssrPos, chromPosStart, chromPosEnd, strand):
173 if strand == '+':
174 newPos = int(chromPosStart) + int(ssrPos) - 1
175 elif strand == '-':
176 newPos = int(chromPosEnd) - int(ssrPos) + 1
177 return newPos
178
179 def getSsrSeq(self, motif, nbMotif):
180 ssrSeq = motif * int(nbMotif)
181 return ssrSeq
182
183 def _createGFFOutputFile(self):
184 GFFfile = open(self._outputFileGFF, 'w')
185 GFFfile.write("##gff-version 3\n")
186 GFFfile.close()
187
188 def _printGFFLinesToOutputFile(self, lLinesToPrint):
189 GFFfile = open(self._outputFileGFF, 'a')
190 for line in lLinesToPrint:
191 GFFfile.write(line)
192 GFFfile.close()
193
194 if __name__ == '__main__':
195 iCrossSsrAndBesMappedByBlatToGff = CrossSsrAndBesMappedByBlatToGff()
196 iCrossSsrAndBesMappedByBlatToGff.setAttributesFromCmdLine()
197 iCrossSsrAndBesMappedByBlatToGff.run()