| 6 | 1 #! /usr/bin/env python | 
|  | 2 # | 
|  | 3 # Copyright INRA-URGI 2009-2010 | 
|  | 4 # | 
|  | 5 # This software is governed by the CeCILL license under French law and | 
|  | 6 # abiding by the rules of distribution of free software. You can use, | 
|  | 7 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 8 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 9 # "http://www.cecill.info". | 
|  | 10 # | 
|  | 11 # As a counterpart to the access to the source code and rights to copy, | 
|  | 12 # modify and redistribute granted by the license, users are provided only | 
|  | 13 # with a limited warranty and the software's author, the holder of the | 
|  | 14 # economic rights, and the successive licensors have only limited | 
|  | 15 # liability. | 
|  | 16 # | 
|  | 17 # In this respect, the user's attention is drawn to the risks associated | 
|  | 18 # with loading, using, modifying and/or developing or reproducing the | 
|  | 19 # software by the user in light of its specific status of free software, | 
|  | 20 # that may mean that it is complicated to manipulate, and that also | 
|  | 21 # therefore means that it is reserved for developers and experienced | 
|  | 22 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 23 # encouraged to load and test the software's suitability as regards their | 
|  | 24 # requirements in conditions enabling the security of their systems and/or | 
|  | 25 # data to be ensured and, more generally, to use and operate it in the | 
|  | 26 # same conditions as regards security. | 
|  | 27 # | 
|  | 28 # The fact that you are presently reading this means that you have had | 
|  | 29 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 30 # | 
|  | 31 """ | 
|  | 32 Read a mapping file (many formats supported) and select some of them | 
|  | 33 Mappings should be sorted by read names | 
|  | 34 """ | 
|  | 35 import os, random, shelve | 
|  | 36 from optparse import OptionParser, OptionGroup | 
|  | 37 from commons.core.parsing.ParserChooser import ParserChooser | 
|  | 38 from commons.core.parsing.FastaParser import FastaParser | 
|  | 39 from commons.core.parsing.FastqParser import FastqParser | 
|  | 40 from commons.core.parsing.GffParser import GffParser | 
|  | 41 from commons.core.writer.BedWriter import BedWriter | 
|  | 42 from commons.core.writer.UcscWriter import UcscWriter | 
|  | 43 from commons.core.writer.GbWriter import GbWriter | 
|  | 44 from commons.core.writer.Gff2Writer import Gff2Writer | 
|  | 45 from commons.core.writer.Gff3Writer import Gff3Writer | 
|  | 46 from commons.core.writer.FastaWriter import FastaWriter | 
|  | 47 from commons.core.writer.FastqWriter import FastqWriter | 
|  | 48 from commons.core.writer.MySqlTranscriptWriter import MySqlTranscriptWriter | 
|  | 49 from SMART.Java.Python.mySql.MySqlConnection import MySqlConnection | 
|  | 50 from SMART.Java.Python.mySql.MySqlTable import MySqlTable | 
|  | 51 from SMART.Java.Python.misc.RPlotter import RPlotter | 
|  | 52 from SMART.Java.Python.misc.Progress import Progress | 
|  | 53 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress | 
|  | 54 | 
|  | 55 | 
|  | 56 distanceExons = 20 | 
|  | 57 exonSize      = 20 | 
|  | 58 | 
|  | 59 | 
|  | 60 class MapperAnalyzer(object): | 
|  | 61     """ | 
|  | 62     Analyse the output of a parser | 
|  | 63     """ | 
|  | 64 | 
|  | 65     def __init__(self, verbosity = 0): | 
|  | 66         self.verbosity                = verbosity | 
|  | 67         self.mySqlConnection          = MySqlConnection(verbosity) | 
|  | 68         self.tooShort                 = 0 | 
|  | 69         self.tooManyMismatches        = 0 | 
|  | 70         self.tooManyGaps              = 0 | 
|  | 71         self.tooShortExons            = 0 | 
|  | 72         self.tooManyMappings          = 0 | 
|  | 73         self.nbMappings               = 0 | 
|  | 74         self.nbSequences              = 0 | 
|  | 75         self.nbAlreadyMapped          = 0 | 
|  | 76         self.nbAlreadyMappedSequences = 0 | 
|  | 77         self.nbWrittenMappings        = 0 | 
|  | 78         self.nbWrittenSequences       = 0 | 
|  | 79         self.parser                   = None | 
|  | 80         self.logHandle                = None | 
|  | 81         self.randomNumber             = random.randint(0, 100000) | 
|  | 82         self.gff3Writer               = None | 
|  | 83         self.alreadyMappedReader      = None | 
|  | 84         self.unmatchedWriter          = None | 
|  | 85         self.sequenceListParser       = None | 
|  | 86         self.sequences                = None | 
|  | 87         self.alreadyMapped            = None | 
|  | 88         self.mappedNamesTable         = None | 
|  | 89         self.minSize                  = None | 
|  | 90         self.minId                    = None | 
|  | 91         self.maxMismatches            = None | 
|  | 92         self.maxGaps                  = None | 
|  | 93         self.maxMappings              = None | 
|  | 94         self.merge                    = False | 
|  | 95         self.checkExons               = False | 
|  | 96         self.suffix                   = None | 
|  | 97         self.tmpDirectory             = "%s%s" % (os.environ["SMARTMPPATH"], os.sep) if "SMARTMPPATH" in os.environ else "" | 
|  | 98 | 
|  | 99 | 
|  | 100     def __del__(self): | 
|  | 101         if self.sequences != None: | 
|  | 102             self.sequences.close() | 
|  | 103         if self.alreadyMapped != None: | 
|  | 104             self.alreadyMapped.close() | 
|  | 105         if self.mappedNamesTable != None: | 
|  | 106             self.mappedNamesTable.remove() | 
|  | 107         if self.gff3Writer != None: | 
|  | 108             self.gff3Writer.close() | 
|  | 109 | 
|  | 110         if self.logHandle != None: | 
|  | 111             self.logHandle.close() | 
|  | 112 | 
|  | 113 | 
|  | 114     def setMappingFile(self, fileName, format): | 
|  | 115         parserChooser = ParserChooser(self.verbosity) | 
|  | 116         parserChooser.findFormat(format, "mapping") | 
|  | 117         self.parser = parserChooser.getParser(fileName) | 
|  | 118 | 
|  | 119 | 
|  | 120     def setSequenceFile(self, fileName, format): | 
|  | 121         if format == "fasta": | 
|  | 122             self.sequenceListParser = FastaParser(fileName, self.verbosity) | 
|  | 123         elif format == "fastq": | 
|  | 124             self.sequenceListParser = FastqParser(fileName, self.verbosity) | 
|  | 125         else: | 
|  | 126             raise Exception("Do not understand sequence format %s" % (format)) | 
|  | 127 | 
|  | 128 | 
|  | 129     def setOutputFile(self, fileName, title): | 
|  | 130         self.gff3Writer = Gff3Writer(fileName, self.verbosity) | 
|  | 131         self.gff3Writer.setTitle(title) | 
|  | 132 | 
|  | 133 | 
|  | 134     def setAlreadyMatched(self, fileName): | 
|  | 135         self.alreadyMappedReader = GffParser(fileName, self.verbosity) | 
|  | 136 | 
|  | 137 | 
|  | 138     def setRemainingFile(self, fileName, format): | 
|  | 139         if format == "fasta": | 
|  | 140             self.unmatchedWriter = FastaWriter("%s_unmatched.fasta" % (fileName), self.verbosity) | 
|  | 141         elif format == "fastq": | 
|  | 142             self.unmatchedWriter = FastqWriter("%s_unmatched.fastq" % (fileName), self.verbosity) | 
|  | 143         else: | 
|  | 144             raise Exception("Do not understand %s format." % (format)) | 
|  | 145         self.mappedNamesTable = MySqlTable(self.mySqlConnection, "mappedNames_%d" % (self.randomNumber), self.verbosity) | 
|  | 146         self.mappedNamesTable.create(["name"], {"name": "char"}, {"name": 50}) | 
|  | 147         self.mappedNamesTable.createIndex("iNameMapped", ["name", ], True) | 
|  | 148 | 
|  | 149 | 
|  | 150     def setLog(self, fileName): | 
|  | 151         self.logHandle = open(fileName, "w") | 
|  | 152 | 
|  | 153 | 
|  | 154     def setMinSize(self, size): | 
|  | 155         self.minSize = size | 
|  | 156 | 
|  | 157 | 
|  | 158     def setMinId(self, id): | 
|  | 159         self.minId = id | 
|  | 160 | 
|  | 161 | 
|  | 162     def setMaxMismatches(self, mismatches): | 
|  | 163         self.maxMismatches = mismatches | 
|  | 164 | 
|  | 165 | 
|  | 166     def setMaxGaps(self, gaps): | 
|  | 167         self.maxGaps = gaps | 
|  | 168 | 
|  | 169 | 
|  | 170     def setMaxMappings(self, mappings): | 
|  | 171         self.maxMappings = mappings | 
|  | 172 | 
|  | 173 | 
|  | 174     def mergeExons(self, b): | 
|  | 175         self.merge = b | 
|  | 176 | 
|  | 177 | 
|  | 178     def acceptShortExons(self, b): | 
|  | 179         self.checkExons = not b | 
|  | 180 | 
|  | 181 | 
|  | 182     def countMappings(self): | 
|  | 183         self.nbMappings = self.parser.getNbMappings() | 
|  | 184         if self.verbosity > 0: | 
|  | 185             print "%i matches found" % (self.nbMappings) | 
|  | 186 | 
|  | 187 | 
|  | 188     def storeAlreadyMapped(self): | 
|  | 189         self.alreadyMapped            = shelve.open("%stmpAlreadyMapped_%d" % (self.tmpDirectory, self.randomNumber)) | 
|  | 190         progress                      = Progress(self.alreadyMappedReader.getNbTranscripts(), "Reading already mapped reads", self.verbosity) | 
|  | 191         self.nbAlreadyMappedSequences = 0 | 
|  | 192         for transcript in self.alreadyMappedReader.getIterator(): | 
|  | 193             if not self.alreadyMapped.has_key(transcript.getName()): | 
|  | 194                 self.alreadyMapped[transcript.getName()] = 1 | 
|  | 195                 self.nbAlreadyMappedSequences           += 1 | 
|  | 196             progress.inc() | 
|  | 197         progress.done() | 
|  | 198         self.nbAlreadyMapped = self.alreadyMappedReader.getNbTranscripts() | 
|  | 199 | 
|  | 200 | 
|  | 201     def storeSequences(self): | 
|  | 202         self.sequences = shelve.open("%stmpSequences_%d" % (self.tmpDirectory, self.randomNumber)) | 
|  | 203         progress       = Progress(self.sequenceListParser.getNbSequences(), "Reading sequences", self.verbosity) | 
|  | 204         for sequence in self.sequenceListParser.getIterator(): | 
|  | 205             self.sequences[sequence.getName().split(" ")[0]] = len(sequence.getSequence()) | 
|  | 206             self.nbSequences += 1 | 
|  | 207             progress.inc() | 
|  | 208         progress.done() | 
|  | 209         if self.verbosity > 0: | 
|  | 210             print "%i sequences read" % (self.nbSequences) | 
|  | 211 | 
|  | 212 | 
|  | 213     def checkOrder(self): | 
|  | 214         names        = shelve.open("%stmpNames_%d" % (self.tmpDirectory, self.randomNumber)) | 
|  | 215         previousName = None | 
|  | 216         progress = Progress(self.nbMappings, "Checking mapping file", self.verbosity) | 
|  | 217         for mapping in self.parser.getIterator(): | 
|  | 218             name = mapping.queryInterval.getName() | 
|  | 219             if name != previousName and previousName != None: | 
|  | 220                 if names.has_key(previousName): | 
|  | 221                     raise Exception("Error! Input mapping file is not ordered! (Name '%s' occurs at least twice)" % (previousName)) | 
|  | 222                 names[previousName] = 1 | 
|  | 223                 previousName        = name | 
|  | 224             progress.inc() | 
|  | 225         progress.done() | 
|  | 226         names.close() | 
|  | 227 | 
|  | 228 | 
|  | 229     def checkPreviouslyMapped(self, name): | 
|  | 230         if self.alreadyMappedReader == None: | 
|  | 231             return False | 
|  | 232         return self.alreadyMapped.has_key(name) | 
|  | 233 | 
|  | 234 | 
|  | 235     def findOriginalSize(self, name): | 
|  | 236         alternate    = "%s/1" % (name) | 
|  | 237         if (self.suffix == None) or (not self.suffix): | 
|  | 238             if self.sequences.has_key(name): | 
|  | 239                 self.suffix = False | 
|  | 240                 return self.sequences[name] | 
|  | 241             if self.suffix == None: | 
|  | 242                 self.suffix = True | 
|  | 243             else: | 
|  | 244                 raise Exception("Cannot find name %n" % (name)) | 
|  | 245         if (self.suffix): | 
|  | 246             if self.sequences.has_key(alternate): | 
|  | 247                 return self.sequences[alternate] | 
|  | 248         raise Exception("Cannot find name %s" % (name)) | 
|  | 249 | 
|  | 250 | 
|  | 251     def checkErrors(self, mapping): | 
|  | 252         accepted = True | 
|  | 253         # short size | 
|  | 254         if self.minSize != None and mapping.size * 100 < self.minSize * mapping.queryInterval.size: | 
|  | 255             self.tooShort += 1 | 
|  | 256             accepted    = False | 
|  | 257             if self.logHandle != None: | 
|  | 258                 self.logHandle.write("size of mapping %s is too short (%i instead of %i)\n" % (str(mapping), mapping.queryInterval.size, mapping.size)) | 
|  | 259         # low identity | 
|  | 260         if self.minId != None and mapping.getTagValue("identity") < self.minId: | 
|  | 261             self.tooManyMismatches += 1 | 
|  | 262             accepted                = False | 
|  | 263             if self.logHandle != None: | 
|  | 264                 self.logHandle.write("mapping %s has a low identity rate\n" % (str(mapping))) | 
|  | 265         # too many mismatches | 
|  | 266         if self.maxMismatches != None and mapping.getTagValue("nbMismatches") > self.maxMismatches: | 
|  | 267             self.tooManyMismatches += 1 | 
|  | 268             accepted                = False | 
|  | 269             if self.logHandle != None: | 
|  | 270                 self.logHandle.write("mapping %s has more mismatches than %i\n" % (str(mapping), self.maxMismatches)) | 
|  | 271         # too many gaps | 
|  | 272         if self.maxGaps != None and mapping.getTagValue("nbGaps") > self.maxGaps: | 
|  | 273             self.tooManyGaps += 1 | 
|  | 274             accepted         = False | 
|  | 275             if self.logHandle != None: | 
|  | 276                 self.logHandle.write("mapping %s has more gaps than %i\n" % (str(mapping), self.maxGaps)) | 
|  | 277         # short exons | 
|  | 278         if self.checkExons and len(mapping.subMappings) > 1 and min([subMapping.targetInterval.getSize() for subMapping in mapping.subMappings]) < exonSize: | 
|  | 279             self.tooShortExons += 1 | 
|  | 280             accepted            = False | 
|  | 281             if self.logHandle != None: | 
|  | 282                 self.logHandle.write("sequence %s maps as too short exons\n" % (mapping)) | 
|  | 283         return accepted | 
|  | 284 | 
|  | 285 | 
|  | 286     def checkNbMappings(self, mappings): | 
|  | 287         nbOccurrences = 0 | 
|  | 288         for mapping in mappings: | 
|  | 289             nbOccurrences += 1 if "nbOccurrences" not in mapping.getTagNames() else mapping.getTagValue("nbOccurrences") | 
|  | 290         if (self.maxMappings != None and nbOccurrences > self.maxMappings): | 
|  | 291             self.tooManyMappings += 1 | 
|  | 292             if self.logHandle != None: | 
|  | 293                 self.logHandle.write("sequence %s maps %i times\n" % (mappings[0].queryInterval.getName(), nbOccurrences)) | 
|  | 294             return False | 
|  | 295         return (nbOccurrences > 0) | 
|  | 296 | 
|  | 297 | 
|  | 298     def sortMappings(self, mappings): | 
|  | 299         nbOccurrences = 0 | 
|  | 300         for mapping in mappings: | 
|  | 301             nbOccurrences += 1 if "nbOccurrences" not in mapping.getTagNames() else mapping.getTagValue("nbOccurrences") | 
|  | 302 | 
|  | 303         orderedMappings = sorted(mappings, key = lambda mapping: mapping.getErrorScore()) | 
|  | 304         cpt             = 1 | 
|  | 305         rank            = 1 | 
|  | 306         previousMapping = None | 
|  | 307         previousScore   = None | 
|  | 308         wasLastTie      = False | 
|  | 309         rankedMappings  = [] | 
|  | 310         bestRegion      = "%s:%d-%d" % (orderedMappings[0].targetInterval.getChromosome(), orderedMappings[0].targetInterval.getStart(), orderedMappings[0].targetInterval.getEnd()) | 
|  | 311         for mapping in orderedMappings: | 
|  | 312             mapping.setNbOccurrences(nbOccurrences) | 
|  | 313             mapping.setOccurrence(cpt) | 
|  | 314 | 
|  | 315             score = mapping.getErrorScore() | 
|  | 316             if previousScore != None and previousScore == score: | 
|  | 317                 if "Rank" in previousMapping.getTagNames(): | 
|  | 318                     if not wasLastTie: | 
|  | 319                         previousMapping.setRank("%sTie" % (rank)) | 
|  | 320                     mapping.setRank("%sTie" % (rank)) | 
|  | 321                     wasLastTie = True | 
|  | 322             else: | 
|  | 323                 rank = cpt | 
|  | 324                 mapping.setRank(rank) | 
|  | 325                 wasLastTie = False | 
|  | 326             if cpt != 1: | 
|  | 327                 mapping.setBestRegion(bestRegion) | 
|  | 328 | 
|  | 329             rankedMappings.append(mapping) | 
|  | 330             previousMapping = mapping | 
|  | 331             previousScore   = score | 
|  | 332             cpt            += 1 | 
|  | 333         return rankedMappings | 
|  | 334 | 
|  | 335 | 
|  | 336     def processMappings(self, mappings): | 
|  | 337         if not mappings: | 
|  | 338             return | 
|  | 339         selectedMappings = [] | 
|  | 340         name             = mappings[0].queryInterval.getName() | 
|  | 341         size             = self.findOriginalSize(name) | 
|  | 342         for mapping in mappings: | 
|  | 343             if self.merge: | 
|  | 344                 mapping.mergeExons(distanceExons) | 
|  | 345             mapping.queryInterval.size = size | 
|  | 346             if self.checkErrors(mapping): | 
|  | 347                 selectedMappings.append(mapping) | 
|  | 348 | 
|  | 349         if self.checkNbMappings(selectedMappings): | 
|  | 350             if self.unmatchedWriter != None: | 
|  | 351                 query = self.mySqlConnection.executeQuery("INSERT INTO %s (name) VALUES ('%s')" % (self.mappedNamesTable.name, name if not self.suffix else "%s/1" % (name))) | 
|  | 352             self.nbWrittenSequences += 1 | 
|  | 353             mappings = self.sortMappings(selectedMappings) | 
|  | 354             for mapping in mappings: | 
|  | 355                 self.nbWrittenMappings += 1 | 
|  | 356                 self.gff3Writer.addTranscript(mapping.getTranscript()) | 
|  | 357 | 
|  | 358 | 
|  | 359     def readMappings(self): | 
|  | 360         previousQueryName = None | 
|  | 361         mappings          = [] | 
|  | 362         self.parser.reset() | 
|  | 363         progress = Progress(self.nbMappings, "Reading mappings", self.verbosity) | 
|  | 364         for mapping in self.parser.getIterator(): | 
|  | 365             queryName = mapping.queryInterval.getName().split(" ")[0] | 
|  | 366             if self.checkPreviouslyMapped(queryName): | 
|  | 367                 if self.logHandle != None: | 
|  | 368                     self.logHandle.write("Mapping %s has already been mapped.\n" % (queryName)) | 
|  | 369             else: | 
|  | 370                 if previousQueryName == queryName: | 
|  | 371                     mappings.append(mapping) | 
|  | 372                 else: | 
|  | 373                     if previousQueryName != None: | 
|  | 374                         self.processMappings(mappings) | 
|  | 375                     previousQueryName = queryName | 
|  | 376                     mappings          = [mapping, ] | 
|  | 377             progress.inc() | 
|  | 378         self.processMappings(mappings) | 
|  | 379         self.gff3Writer.write() | 
|  | 380         self.gff3Writer.close() | 
|  | 381         progress.done() | 
|  | 382 | 
|  | 383 | 
|  | 384     def writeUnmatched(self): | 
|  | 385         progress = Progress(self.nbSequences, "Reading unmatched sequences", self.verbosity) | 
|  | 386         for sequence in self.sequenceListParser.getIterator(): | 
|  | 387             name = sequence.getName().split(" ")[0] | 
|  | 388             query = self.mySqlConnection.executeQuery("SELECT * FROM %s WHERE name = '%s' LIMIT 1" % (self.mappedNamesTable.name, name)) | 
|  | 389             if query.isEmpty(): | 
|  | 390                 self.unmatchedWriter.addSequence(sequence) | 
|  | 391             progress.inc() | 
|  | 392         progress.done() | 
|  | 393 | 
|  | 394 | 
|  | 395     def analyze(self): | 
|  | 396         self.countMappings() | 
|  | 397         self.checkOrder() | 
|  | 398         self.storeSequences() | 
|  | 399         if self.alreadyMappedReader != None: | 
|  | 400             self.storeAlreadyMapped() | 
|  | 401         self.readMappings() | 
|  | 402         if self.unmatchedWriter != None: | 
|  | 403             self.writeUnmatched() | 
|  | 404 | 
|  | 405 | 
|  | 406 | 
|  | 407 | 
|  | 408 if __name__ == "__main__": | 
|  | 409 | 
|  | 410     # parse command line | 
|  | 411     description = "Mapper Analyzer v1.0.1: Read the output of an aligner, print statistics and possibly translate into BED or GBrowse formats. [Category: Conversion]" | 
|  | 412 | 
|  | 413     parser = OptionParser(description = description) | 
|  | 414     compGroup = OptionGroup(parser, "Compulsory options") | 
|  | 415     filtGroup = OptionGroup(parser, "Filtering options") | 
|  | 416     tranGroup = OptionGroup(parser, "Transformation options") | 
|  | 417     outpGroup = OptionGroup(parser, "Output options") | 
|  | 418     otheGroup = OptionGroup(parser, "Other options") | 
|  | 419     compGroup.add_option("-i", "--input",            dest="inputFileName",     action="store",                        type="string", help="input file (output of the tool) [compulsory] [format: file in mapping format given by -f]") | 
|  | 420     compGroup.add_option("-f", "--format",           dest="format",            action="store",      default="seqmap", type="string", help="format of the file [compulsory] [format: mapping file format]") | 
|  | 421     compGroup.add_option("-q", "--sequences",        dest="sequencesFileName", action="store",                        type="string", help="file of the sequences [compulsory] [format: file in sequence format given by -k]") | 
|  | 422     compGroup.add_option("-k", "--seqFormat",        dest="sequenceFormat",    action="store",      default="fasta",  type="string", help="format of the sequences: fasta or fastq [default: fasta] [format: sequence file format]") | 
|  | 423     compGroup.add_option("-o", "--output",           dest="outputFileName",    action="store",                        type="string", help="output file [compulsory] [format: output file in GFF3 format]") | 
|  | 424     filtGroup.add_option("-n", "--number",           dest="number",            action="store",      default=None,     type="int",    help="max. number of occurrences of a sequence [format: int]") | 
|  | 425     filtGroup.add_option("-s", "--size",             dest="size",              action="store",      default=None,     type="int",    help="minimum pourcentage of size [format: int]") | 
|  | 426     filtGroup.add_option("-d", "--identity",         dest="identity",          action="store",      default=None,     type="int",    help="minimum pourcentage of identity [format: int]") | 
|  | 427     filtGroup.add_option("-m", "--mismatch",         dest="mismatch",          action="store",      default=None,     type="int",    help="maximum number of mismatches [format: int]") | 
|  | 428     filtGroup.add_option("-p", "--gap",              dest="gap",               action="store",      default=None,     type="int",    help="maximum number of gaps [format: int]") | 
|  | 429     tranGroup.add_option("-e", "--mergeExons",       dest="mergeExons",        action="store_true", default=False,                   help="merge exons when introns are short [format: bool] [default: false]") | 
|  | 430     tranGroup.add_option("-x", "--removeExons",      dest="removeExons",       action="store_true", default=False,                   help="remove transcripts when exons are short [format: bool] [default: false]") | 
|  | 431     outpGroup.add_option("-t", "--title",            dest="title",             action="store",      default="SMART",  type="string", help="title of the UCSC track [format: string] [default: SMART]") | 
|  | 432     outpGroup.add_option("-r", "--remaining",        dest="remaining",         action="store_true", default=False,                   help="print the unmatched sequences [format: bool] [default: false]") | 
|  | 433     otheGroup.add_option("-a", "--append",           dest="appendFileName",    action="store",      default=None,     type="string", help="append to GFF3 file [format: file in GFF3 format]") | 
|  | 434     otheGroup.add_option("-v", "--verbosity",        dest="verbosity",         action="store",      default=1,        type="int",    help="trace level [default: 1] [format: int]") | 
|  | 435     otheGroup.add_option("-l", "--log",              dest="log",               action="store_true", default=False,                   help="write a log file [format: bool] [default: false]") | 
|  | 436     parser.add_option_group(compGroup) | 
|  | 437     parser.add_option_group(filtGroup) | 
|  | 438     parser.add_option_group(tranGroup) | 
|  | 439     parser.add_option_group(outpGroup) | 
|  | 440     parser.add_option_group(otheGroup) | 
|  | 441     (options, args) = parser.parse_args() | 
|  | 442 | 
|  | 443 | 
|  | 444     analyzer = MapperAnalyzer(options.verbosity) | 
|  | 445     analyzer.setMappingFile(options.inputFileName, options.format) | 
|  | 446     analyzer.setSequenceFile(options.sequencesFileName, options.sequenceFormat) | 
|  | 447     analyzer.setOutputFile(options.outputFileName, options.title) | 
|  | 448     if options.appendFileName != None: | 
|  | 449         analyzer.setAlreadyMatched(options.appendFileName) | 
|  | 450     if options.remaining: | 
|  | 451         analyzer.setRemainingFile(options.outputFileName, options.sequenceFormat) | 
|  | 452     if options.number != None: | 
|  | 453         analyzer.setMaxMappings(options.number) | 
|  | 454     if options.size != None: | 
|  | 455         analyzer.setMinSize(options.size) | 
|  | 456     if options.identity != None: | 
|  | 457         analyzer.setMinId(options.identity) | 
|  | 458     if options.mismatch != None: | 
|  | 459         analyzer.setMaxMismatches(options.mismatch) | 
|  | 460     if options.gap != None: | 
|  | 461         analyzer.setMaxGaps(options.gap) | 
|  | 462     if options.mergeExons: | 
|  | 463         analyzer.mergeExons(True) | 
|  | 464     if options.removeExons: | 
|  | 465         analyzer.acceptShortExons(False) | 
|  | 466     if options.log: | 
|  | 467         analyzer.setLog("%s.log" % (options.outputFileName)) | 
|  | 468     analyzer.analyze() | 
|  | 469 | 
|  | 470     if options.verbosity > 0: | 
|  | 471         print "kept %i sequences over %s (%f%%)" % (analyzer.nbWrittenSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences) / analyzer.nbSequences * 100) | 
|  | 472         if options.appendFileName != None: | 
|  | 473             print "kept %i sequences over %s (%f%%) including already mapped sequences" % (analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences, analyzer.nbSequences, float(analyzer.nbWrittenSequences + analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100) | 
|  | 474         print "kept %i mappings over %i (%f%%)" % (analyzer.nbWrittenMappings, analyzer.nbMappings, float(analyzer.nbWrittenMappings) / analyzer.nbMappings * 100) | 
|  | 475         if options.appendFileName != None: | 
|  | 476             print "kept %i mappings over %i (%f%%) including already mapped" % (analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped, analyzer.nbMappings, float(analyzer.nbWrittenMappings + analyzer.nbAlreadyMapped) / analyzer.nbMappings * 100) | 
|  | 477         print "removed %i too short mappings (%f%%)" % (analyzer.tooShort, float(analyzer.tooShort) / analyzer.nbMappings * 100) | 
|  | 478         print "removed %i mappings with too many mismatches (%f%%)" % (analyzer.tooManyMismatches, float(analyzer.tooManyMismatches) / analyzer.nbMappings * 100) | 
|  | 479         print "removed %i mappings with too many gaps (%f%%)" % (analyzer.tooManyGaps, float(analyzer.tooManyGaps) / analyzer.nbMappings * 100) | 
|  | 480         print "removed %i mappings with too short exons (%f%%)" % (analyzer.tooShortExons, float(analyzer.tooShortExons) / analyzer.nbMappings * 100) | 
|  | 481         print "removed %i sequences with too many hits (%f%%)" % (analyzer.tooManyMappings, float(analyzer.tooManyMappings) / analyzer.nbSequences * 100) | 
|  | 482         print "%i sequences have no mapping (%f%%)" % (analyzer.nbSequences - analyzer.nbWrittenSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences) / analyzer.nbSequences * 100) | 
|  | 483         if options.appendFileName != None: | 
|  | 484             print "%i sequences have no mapping (%f%%) excluding already mapped sequences" % (analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences, float(analyzer.nbSequences - analyzer.nbWrittenSequences - analyzer.nbAlreadyMappedSequences) / analyzer.nbSequences * 100) | 
|  | 485 | 
|  | 486 |