Mercurial > repos > yufei-luo > s_mart
diff SMART/Java/Python/mergeTranscriptLists.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SMART/Java/Python/mergeTranscriptLists.py Fri Jan 18 04:54:14 2013 -0500 @@ -0,0 +1,174 @@ +#! /usr/bin/env python +# +# Copyright INRA-URGI 2009-2010 +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. +# +"""Merge elements of two transcript lists with some condition""" + +import os, random, shutil, glob +from optparse import OptionParser +from commons.core.parsing.SequenceListParser import SequenceListParser +from commons.core.parsing.BedParser import BedParser +from commons.core.parsing.GffParser import GffParser +from commons.core.writer.TranscriptWriter import TranscriptWriter +from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator +from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer +from SMART.Java.Python.misc.RPlotter import RPlotter +from SMART.Java.Python.misc.Progress import Progress + + + +class MergeLists(object): + + def __init__(self, verbosity): + self.verbosity = verbosity + self.seed = random.randint(0, 100000) + self.aggregation = False + self.normalization = False + self.distance = False + self.antisense = False + self.colinear = False + self.fileNames = {} + self.formats = {} + self.tmpFileNames = [] + self.logHandle = None + +# def __del__(self): +# for fileNameRoot in self.tmpFileNames: +# for fileName in glob.glob("%s*" % (fileNameRoot)): +# os.remove(fileName) +# if self.logHandle != None: +# self.logHandle.close() +# self.logHandle = None + + def setLogFileName(self, fileName): + self.logHandle = open(fileName, "w") + + def setInputFileName(self, fileName, format, id): + self.fileNames[id] = fileName + self.formats[id] = format + + def setOutputFileName(self, fileName): + self.outputFileName = fileName + + def setAggregate(self, aggregation): + self.aggregation = aggregation + + def setNormalization(self, normalization): + self.normalization = normalization + + def setDistance(self, distance): + self.distance = distance + + def setAntisense(self, antisense): + self.antisense = antisense + + def setColinear(self, colinear): + self.colinear = colinear + + def createTmpFileName(self, root): + fileName = "tmp_%s_%d.gff3" % (root, self.seed) + self.tmpFileNames.append(fileName) + return fileName + + def selfMerge(self, fileName, format, outputFileName): + transcriptListComparator = TranscriptListsComparator(self.logHandle, self.verbosity) + transcriptListComparator.getColinearOnly(True) + transcriptListComparator.setNormalization(self.normalization) + transcriptContainer = TranscriptContainer(fileName, format, self.verbosity) + writer = TranscriptWriter(outputFileName, "gff3", self.verbosity) + transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer) + transcriptListComparator.setOutputWriter(writer) + transcriptListComparator.compareTranscriptListSelfMerge() + + def keepOverlapping(self, fileNames, formats, outputFileName): + transcriptListComparator = TranscriptListsComparator(self.logHandle, self.verbosity) + transcriptListComparator.getAntisenseOnly(self.antisense) + transcriptListComparator.getColinearOnly(self.colinear) + for i in (0, 1): + transcriptContainer = TranscriptContainer(fileNames[i], formats[i], self.verbosity) + transcriptListComparator.setInputTranscriptContainer(i, transcriptContainer) + transcriptListComparator.aggregate(self.aggregation) + transcriptListComparator.setNormalization(self.normalization) + transcriptListComparator.setMaxDistance(self.distance) + writer = TranscriptWriter(outputFileName, "gff3", self.verbosity) + transcriptListComparator.setOutputWriter(writer) + transcriptListComparator.compareTranscriptList() + + def mergeFiles(self, fileName1, fileName2, outputFileName): + outputFile = open(outputFileName, "w") + shutil.copyfileobj(open(fileName1, "r"), outputFile) + shutil.copyfileobj(open(fileName2, "r"), outputFile) + outputFile.close() + + def run(self): + selectedFileQuery = self.createTmpFileName("query") + self.keepOverlapping({0: self.fileNames[0], 1: self.fileNames[0]}, {0: "gff3", 1: "gff3"}, selectedFileQuery) + mergeFileTarget = self.createTmpFileName("target") + self.selfMerge(self.fileNames[1], self.formats[1], mergeFileTarget) + if not self.aggregation: + overlapFile = self.createTmpFileName("overlap") + self.keepOverlapping({0: mergeFileTarget, 1: selectedFileQuery}, {0: "gff3", 1: "gff3"}, overlapFile) + mergeFileTarget = overlapFile + mergeFileMerged = self.createTmpFileName("merged") + self.mergeFiles(mergeFileTarget, selectedFileQuery, mergeFileMerged) + self.selfMerge(mergeFileMerged, "gff3", self.outputFileName) + + + +if __name__ == "__main__": + + # parse command line + description = "Merge Lists v1.0.3: Merge the elements of two lists of genomic coordinates. [Category: Merge]" + + parser = OptionParser(description = description) + parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]") + parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]") + parser.add_option("-j", "--input2", dest="inputFileName2", action="store", default=None, type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") + parser.add_option("-g", "--format2", dest="format2", action="store", default=None, type="string", help="format of file 2 [compulsory] [format: file in transcript format]") + parser.add_option("-o", "--output", dest="outputFileName", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]") + parser.add_option("-k", "--all", dest="all", action="store_true", default=False, help="print all the transcripts, not only those overlapping [format: bool] [default: false]") + parser.add_option("-d", "--distance", dest="distance", action="store", default=0, type="int", help="max. distance between two transcripts [format: int] [default: 0]") + parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="antisense only [format: bool] [default: false]") + parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="colinear only [format: bool] [default: false]") + parser.add_option("-n", "--normalize", dest="normalize", action="store_true", default=False, help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]") + parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") + (options, args) = parser.parse_args() + +# ml = MergeLists(logHandle, options.verbosity) + + ml = MergeLists(0) + ml.setInputFileName(options.inputFileName1, options.format1, 0) + ml.setInputFileName(options.inputFileName2, options.format2, 1) + ml.setOutputFileName(options.outputFileName) + ml.setAntisense(options.antisense) + ml.setColinear(options.colinear) + ml.setAggregate(options.all) + ml.setNormalization(options.normalize) + ml.setDistance(options.distance) + ml.run()