| 6 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 from optparse import OptionParser | 
|  | 4 from commons.core.parsing.FastaParser import FastaParser | 
|  | 5 from commons.core.writer.Gff3Writer import Gff3Writer | 
|  | 6 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer | 
|  | 7 from SMART.Java.Python.misc.Progress import Progress | 
|  | 8 from commons.core.utils.RepetOptionParser import RepetOptionParser | 
|  | 9 from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow | 
|  | 10 | 
|  | 11 | 
|  | 12 class CountReadGCPercent(object): | 
|  | 13 | 
|  | 14     def __init__(self): | 
|  | 15         self.referenceReader = None | 
|  | 16         self.gffReader = None | 
|  | 17         self.outputWriter = None | 
|  | 18         self.verbose = 0 | 
|  | 19 | 
|  | 20     def setInputReferenceFile(self, fileName): | 
|  | 21         self.referenceReader = fileName | 
|  | 22 | 
|  | 23     def setInputGffFile(self, fileName): | 
|  | 24         self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose) | 
|  | 25 | 
|  | 26     def setOutputFileName(self, fileName): | 
|  | 27         self.outputWriter = Gff3Writer(fileName, self.verbose) | 
|  | 28 | 
|  | 29     def readGffAnnotation(self): | 
|  | 30         self.coveredRegions = {} | 
|  | 31         progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose) | 
|  | 32         for transcript in self.gffReader.getIterator(): | 
|  | 33             chromosome = transcript.getChromosome() | 
|  | 34             if chromosome not in self.coveredRegions: | 
|  | 35                 self.coveredRegions[chromosome] = {} | 
|  | 36             for exon in transcript.getExons(): | 
|  | 37                 for position in range(exon.getStart(), exon.getEnd()+1): | 
|  | 38                     self.coveredRegions[chromosome][position] = 1 | 
|  | 39             progress.inc() | 
|  | 40         progress.done() | 
|  | 41 | 
|  | 42     def write(self): | 
|  | 43         iParser = FastaParser(self.referenceReader) | 
|  | 44         iParser.setTags() | 
|  | 45         iGetGCPercentBySW = CountGCPercentBySlidingWindow() | 
|  | 46         progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose) | 
|  | 47         for transcript in self.gffReader.getIterator(): | 
|  | 48             chromosome = transcript.getChromosome() | 
|  | 49             GCpercent = 0 | 
|  | 50             nPercent = 0 | 
|  | 51             for exon in transcript.getExons(): | 
|  | 52                     for sequenceName in iParser.getTags().keys(): | 
|  | 53                         if sequenceName != chromosome: | 
|  | 54                             continue | 
|  | 55                         else: | 
|  | 56                             subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1) | 
|  | 57                             GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence) | 
|  | 58                             print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent) | 
|  | 59             transcript.setTagValue("GCpercent", GCpercent) | 
|  | 60             transcript.setTagValue("NPercent", nPercent) | 
|  | 61             self.outputWriter.addTranscript(transcript) | 
|  | 62             progress.inc() | 
|  | 63         progress.done() | 
|  | 64 | 
|  | 65     def run(self): | 
|  | 66         self.readGffAnnotation() | 
|  | 67         if self.outputWriter != None: | 
|  | 68             self.write() | 
|  | 69 | 
|  | 70 if __name__ == "__main__": | 
|  | 71         description = "Count GC percent for each read against a genome." | 
|  | 72         usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]" | 
|  | 73         examples = "\nExample: \n" | 
|  | 74         examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3" | 
|  | 75         examples += "\n\n" | 
|  | 76         parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples) | 
|  | 77         parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None ) | 
|  | 78         parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None) | 
|  | 79         parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None ) | 
|  | 80         parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 ) | 
|  | 81         (options, args) = parser.parse_args() | 
|  | 82 | 
|  | 83         readGCPercent = CountReadGCPercent() | 
|  | 84         readGCPercent.setInputReferenceFile(options.fastaFile) | 
|  | 85         readGCPercent.setInputGffFile(options.gffFile) | 
|  | 86         readGCPercent.setOutputFileName(options.outputFile) | 
|  | 87         readGCPercent.run() | 
|  | 88 |