# HG changeset patch # User m-zytnicki # Date 1447693175 18000 # Node ID f4de72c80eacf86fc5051ef36149a62e8686fa02 # Parent 4ab9c86890babef1a524b45334c49f03e1e0ceb8 Uploaded diff -r 4ab9c86890ba -r f4de72c80eac SMART/Java/Python/clusterize.py --- a/SMART/Java/Python/clusterize.py Wed Nov 04 03:41:26 2015 -0500 +++ b/SMART/Java/Python/clusterize.py Mon Nov 16 11:59:35 2015 -0500 @@ -101,21 +101,20 @@ fs.setOutputFileName(self.sortedFileNames[fileName]) fs.sort() self.splittedFileNames[fileName] = fs.getOutputFileNames() - self.nbElementsPerChromosome = fs.getNbElementsPerChromosome() - self.nbElements = fs.getNbElements() self.chromosomes.update(self.splittedFileNames[fileName].keys()) def _iterate(self): progress = UnlimitedProgress(10000, "Reading input file", self.verbosity) - transcripts = [] - heap = [] parsersSets = [] + self.nbElements = 0 if self.chromosomes: + for chromosome in self.chromosomes: + parsersSets.append([NCListFileUnpickle(self.splittedFileNames[fileName][chromosome]) for fileName in self.splittedFileNames if chromosome in self.splittedFileNames[fileName]]) + else: parsersSets.append(self.parsers.values()) - else: - for chromosome in self.chromosomes: - parsersSets.append([self.splittedFileNames[fileName][chromosome] for fileName in self.splittedFileNames if chromosome in self.splittedFileNames[fileName]]) for parsers in parsersSets: + transcripts = [] + heap = [] for parser in parsers: iterator = parser.getIterator() for transcript in iterator: @@ -142,6 +141,7 @@ newTranscripts.append(oldTranscript) newTranscripts.append(newTranscript) transcripts = newTranscripts + self.nbElements += 1 progress.inc() for transcript in transcripts: self._write(transcript)