Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/CountReadGCPercent.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from optparse import OptionParser | |
4 from commons.core.parsing.FastaParser import FastaParser | |
5 from commons.core.writer.Gff3Writer import Gff3Writer | |
6 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer | |
7 from SMART.Java.Python.misc.Progress import Progress | |
8 from commons.core.utils.RepetOptionParser import RepetOptionParser | |
9 from Gnome_tools.CountGCPercentBySlidingWindow import CountGCPercentBySlidingWindow | |
10 | |
11 | |
12 class CountReadGCPercent(object): | |
13 | |
14 def __init__(self): | |
15 self.referenceReader = None | |
16 self.gffReader = None | |
17 self.outputWriter = None | |
18 self.verbose = 0 | |
19 | |
20 def setInputReferenceFile(self, fileName): | |
21 self.referenceReader = fileName | |
22 | |
23 def setInputGffFile(self, fileName): | |
24 self.gffReader = TranscriptContainer(fileName, 'gff3', self.verbose) | |
25 | |
26 def setOutputFileName(self, fileName): | |
27 self.outputWriter = Gff3Writer(fileName, self.verbose) | |
28 | |
29 def readGffAnnotation(self): | |
30 self.coveredRegions = {} | |
31 progress = Progress(self.gffReader.getNbTranscripts(), "Reading gff3 annotation file", self.verbose) | |
32 for transcript in self.gffReader.getIterator(): | |
33 chromosome = transcript.getChromosome() | |
34 if chromosome not in self.coveredRegions: | |
35 self.coveredRegions[chromosome] = {} | |
36 for exon in transcript.getExons(): | |
37 for position in range(exon.getStart(), exon.getEnd()+1): | |
38 self.coveredRegions[chromosome][position] = 1 | |
39 progress.inc() | |
40 progress.done() | |
41 | |
42 def write(self): | |
43 iParser = FastaParser(self.referenceReader) | |
44 iParser.setTags() | |
45 iGetGCPercentBySW = CountGCPercentBySlidingWindow() | |
46 progress = Progress(self.gffReader.getNbTranscripts(), "Writing output file", self.verbose) | |
47 for transcript in self.gffReader.getIterator(): | |
48 chromosome = transcript.getChromosome() | |
49 GCpercent = 0 | |
50 nPercent = 0 | |
51 for exon in transcript.getExons(): | |
52 for sequenceName in iParser.getTags().keys(): | |
53 if sequenceName != chromosome: | |
54 continue | |
55 else: | |
56 subSequence = iParser.getSubSequence(sequenceName, exon.getStart() , exon.getEnd(), 1) | |
57 GCpercent, nPercent = iGetGCPercentBySW.getGCPercentAccordingToNAndNPercent(subSequence) | |
58 print "GCpercent = %f, nPercent = %f" % (GCpercent, nPercent) | |
59 transcript.setTagValue("GCpercent", GCpercent) | |
60 transcript.setTagValue("NPercent", nPercent) | |
61 self.outputWriter.addTranscript(transcript) | |
62 progress.inc() | |
63 progress.done() | |
64 | |
65 def run(self): | |
66 self.readGffAnnotation() | |
67 if self.outputWriter != None: | |
68 self.write() | |
69 | |
70 if __name__ == "__main__": | |
71 description = "Count GC percent for each read against a genome." | |
72 usage = "CountReadGCPercent.py -i <fasta file> -j <gff3 file> -o <output gff3 file> -v <verbose> -h]" | |
73 examples = "\nExample: \n" | |
74 examples += "\t$ python CountReadGCPercent.py -i file.fasta -j annotation.gff -o output.gff3" | |
75 examples += "\n\n" | |
76 parser = RepetOptionParser(description = description, usage = usage, version = "v1.0", epilog = examples) | |
77 parser.add_option( '-i', '--inputGenome', dest='fastaFile', help='fasta file [compulsory]', default= None ) | |
78 parser.add_option( '-j', '--inputAnnotation', dest='gffFile', help='gff3 file [compulsory]', default= None) | |
79 parser.add_option( '-o', '--output', dest='outputFile', help='output gff3 file [compulsory]', default= None ) | |
80 parser.add_option( '-v', '--verbose', dest='verbose', help='verbosity level (default=0/1)',type="int", default= 0 ) | |
81 (options, args) = parser.parse_args() | |
82 | |
83 readGCPercent = CountReadGCPercent() | |
84 readGCPercent.setInputReferenceFile(options.fastaFile) | |
85 readGCPercent.setInputGffFile(options.gffFile) | |
86 readGCPercent.setOutputFileName(options.outputFile) | |
87 readGCPercent.run() | |
88 |