# HG changeset patch # User m-zytnicki # Date 1358517707 18000 # Node ID 4dded8b1fbc4a306e6c0592a77bf67b15cbb25f5 # Parent 86c78142123980a505eb2951eb466f427c6fd67a Deleted selected files diff -r 86c781421239 -r 4dded8b1fbc4 SMART/Java/Python/toolLauncher/RnaFoldLauncher.py --- a/SMART/Java/Python/toolLauncher/RnaFoldLauncher.py Fri Jan 18 09:01:23 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,379 +0,0 @@ -# -# Copyright INRA-URGI 2009-2010 -# -# This software is governed by the CeCILL license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL license and that you accept its terms. -# -import os -import sys -import random -import subprocess -from SMART.Java.Python.structure.TranscriptList import TranscriptList -from SMART.Java.Python.structure.Transcript import Transcript -from SMART.Java.Python.misc.Progress import Progress -from commons.core.parsing.FastaParser import FastaParser - - -class RnaFoldStructure(object): - """ - A structure to store the output of RNAFold - @ivar name: the name of the sequence - @type name: string - @ivar sequence: the sequence (with gaps) - @type sequence: string - @ivar structure: the bracket structure - @type structure: string - @ivar energy: the energy of the fold - @type energy: float - @ivar interactions: the interactions inside the structure - @ivar interactions: the interactions inside the structure - """ - - def __init__(self, name, sequence, structure, energy): - """ - Initialize the structure - @param name the name of the sequence - @type name: string - @param sequence: the sequence (with gaps) - @type sequence: string - @param structure: the bracket structure - @type structure: string - @param energy: the energy of the fold - @type energy: float - """ - self.name = name - self.sequence = sequence - self.structure = structure - self.energy = energy - self.interactions = None - - - def analyze(self): - """ - Analyze the output, assign the interactions - """ - if len(self.sequence) != len(self.structure): - sys.exit("Sizes of sequence and structure differ ('%s' and '%s')!\n" % (self.sequence, self.structure)) - stack = [] - self.interactions = [None for i in range(len(self.sequence))] - for i in range(len(self.sequence)): - if self.structure[i] == "(": - stack.append(i) - elif self.structure[i] == ")": - if not stack: - sys.exit("Something wrong in the interaction line '%s'!\n" % (self.structure)) - otherI = stack.pop() - self.interactions[i] = otherI - self.interactions[otherI] = i - if stack: - sys.exit("Something wrong in the interaction line '%s'!\n" % (self.structure)) - - - def getNbBulges(self, start = None, end = None): - """ - Get the number of insertions in a given range (in number of letters) - @param start: where to start the count - @type start: int - @param end: where to end the co - @type end: int - """ - if start == None: - start = 0 - if end == None: - end = len(self.sequence) - previousInt = None - nbBulges = 0 - inRegion = False - for i in range(len(self.sequence)): - if i == start: - inRegion = True - if i > end: - return nbBulges - if inRegion: - if self.interactions[i] == None: - nbBulges += 1 - elif previousInt != None and abs(self.interactions[i] - previousInt) != 1: - nbBulges += 1 - previousInt = self.interactions[i] - return nbBulges - - - def getStar(self, start = None, end = None): - """ - Get the supposed miRNA* - @param start: where to start the count - @type start: int - @param end: where to end the co - @type end: int - """ - if start == None: - start = 0 - if end == None: - end = len(self.sequence) - minStar = 1000000 - maxStar = 0 - inRegion = False - for i in range(len(self.sequence)): - if i == start: - inRegion = True - if i > end: - return (minStar, maxStar) - if inRegion: - if self.interactions[i] != None: - minStar = min(minStar, self.interactions[i]) - maxStar = max(maxStar, self.interactions[i]) - return (minStar, maxStar) - - - -class RnaFoldLauncher(object): - """ - Start and parse a RNAFold instance - @ivar inputTranscriptList: a list of transcripts - @type inputTranscriptList: class L{TranscriptList} - @ivar genomeFileParser: a parser to the genome file - @type genomeFileParser: class L{SequenceListParser} - @ivar bothStrands: whether folding is done on both strands - @type bothStrands: bool - @ivar fivePrimeExtension: extension towards the 5' end - @type fivePrimeExtension: int - @ivar threePrimeExtension: extension towards the 3' end - @type threePrimeExtension: int - @ivar inputTranscriptList: the input list of transcripts - @type inputTranscriptList: class L{TranscriptList} - @ivar outputTranscriptList: the output list of transcripts - @type outputTranscriptList: class L{TranscriptList} - @ivar tmpInputFileName: the name of the temporary input file for RNAFold - @type tmpInputFileName: string - @ivar tmpOutputFileName: the name of the temporary output file for RNAFold - @type tmpOutputFileName: string - @ivar verbosity: verbosity - @type verbosity: int [default: 0] - """ - - def __init__(self, verbosity = 0): - """ - Constructor - @param verbosity: verbosity - @type verbosity: int - """ - self.verbosity = verbosity - self.transcriptNames = [] - randomNumber = random.randint(0, 100000) - self.bothStrands = True - self.tmpInputFileName = "tmpInput_%d.fas" % (randomNumber) - self.tmpOutputFileName = "tmpOutput_%d.fas" % (randomNumber) - self.outputTranscriptList = None - self.fivePrimeExtension = 0 - self.threePrimeExtension = 0 - - - def __del__(self): - for file in (self.tmpInputFileName, self.tmpOutputFileName): - if os.path.isfile(file): - os.remove(file) - - - def setTranscriptList(self, inputTranscriptList): - """ - Set the list of transcripts - @ivar inputTranscriptList: a list of transcripts - @type inputTranscriptList: class L{TranscriptList} - """ - self.inputTranscriptList = inputTranscriptList - - - def setExtensions(self, fivePrime, threePrime): - """ - Set extension sizes - @ivar fivePrime: extension towards the 5' end - @type fivePrime: int - @ivar threePrime: extension towards the 3' end - @type threePrime: int - """ - self.fivePrimeExtension = fivePrime - self.threePrimeExtension = threePrime - - - def setNbStrands(self, nbStrands): - """ - Set number of strands - @ivar nbStrands: if 2, the work is done on both strands - @type nbStrands: int - """ - self.nbStrands = nbStrands - - - def setGenomeFile(self, fileName): - """ - Set the genome file - @ivar genomeFileName: the multi-FASTA file containing the genome - @type genomeFileName: a string - """ - self.genomeFileParser = FastaParser(fileName, self.verbosity) - - - def writeInputFile(self, transcript, reverse, fivePrimeExtension, threePrimeExtension): - """ - Write the RNAFold input file, containing the sequence corresponding to the transcript - @ivar transcript: a transcript - @type transcript: class L{Transcript} - @ivar reverse: invert the extensions - @type reverse: bool - """ - transcriptCopy = Transcript(transcript) - transcriptCopy.removeExons() - if not reverse: - transcriptCopy.extendStart(fivePrimeExtension) - transcriptCopy.extendEnd(threePrimeExtension) - else: - transcriptCopy.extendStart(threePrimeExtension) - transcriptCopy.extendEnd(fivePrimeExtension) - sequence = transcriptCopy.extractSequence(self.genomeFileParser) - handle = open(self.tmpInputFileName, "w") - handle.write(">%s\n%s\n" % (sequence.getName().replace(":", "_").replace(".", "_"), sequence.getSequence())) - handle.close() - - - def startRnaFold(self): - """ - Start RNAFold - """ - command = "RNAfold < %s > %s" % (self.tmpInputFileName, self.tmpOutputFileName) - if self.verbosity > 100: - print "Launching command '%s'" % (command) - status = subprocess.call(command, shell=True) - if status != 0: - raise Exception("Problem with RNAFold! Input file is %s, status is: %s" % (self.tmpInputFileName, status)) - - - def parseRnaFoldOutput(self): - """ - Parse an output file of RNAFold - @return: an RnaFoldStructure - """ - lines = open(self.tmpOutputFileName).readlines() - if len(lines) != 3: - raise Exception("Problem in RNAfold output! '%s'" % (lines)) - name = lines[0].strip()[1:].split()[0] - sequence = lines[1].strip() - structure = lines[2].strip().split()[0] - energy = float(lines[2].strip().split(" ", 1)[1][1:-1].strip()) - if self.verbosity > 100: - print "Getting sequence %s, structure %s" % (sequence, structure) - return RnaFoldStructure(name, sequence, structure, energy) - - - def analyzeRnaFoldOutput(self, transcript, rnaFoldOutput, reverse, fivePrimeExtension, threePrimeExtension): - """ - Analyze the RNAFold - @ivar transcript: a transcript - @type transcript: class L{Transcript} - @ivar rnaFoldOutput: the output of RNAFold - @type rnaFoldOutput: class L{RnaFoldStructure} - @ivar reverse: invert the extensions - @type reverse: bool - @return: a t-uple of energy, number of insertions, number of bulges, strand - """ - rnaFoldOutput.analyze() - transcriptSize = transcript.end - transcript.start + 1 - start = fivePrimeExtension if not reverse else threePrimeExtension - end = start + transcriptSize - energy = rnaFoldOutput.energy - nbBulges = rnaFoldOutput.getNbBulges(start, end) - (minStar, maxStar) = rnaFoldOutput.getStar(start, end) - minStar += transcript.start - start - maxStar += transcript.start - start - if self.verbosity > 100: - print "Getting structure with energy %d, nbBulges %d, miRna* %d-%d, strand %s" % (energy, nbBulges, minStar, maxStar, "-" if reverse else "+") - return (energy, nbBulges, minStar, maxStar, reverse) - - - def fold(self, transcript): - """ - Fold a transcript (in each strand) - @ivar transcript: a transcript - @type transcript: class L{Transcript} - @return: a t-uple of energy, number of insertions, number of bulges, strand - """ - results = [None] * self.nbStrands - strands = [False, True] if self.nbStrands == 2 else [False] - minNbBulges = 1000000 - for i, reverse in enumerate(strands): - self.writeInputFile(transcript, reverse, self.fivePrimeExtension, self.threePrimeExtension) - self.startRnaFold() - output = self.parseRnaFoldOutput() - results[i] = self.analyzeRnaFoldOutput(transcript, output, reverse, self.fivePrimeExtension, self.threePrimeExtension) - minNbBulges = min(minNbBulges, results[i][1]) - for result in results: - if result[1] == minNbBulges: - return result - return None - - - def refold(self, transcript): - """ - Fold a transcript, knowing where the miRNA starts and end - @ivar transcript: a transcript - @type transcript: class L{Transcript} - @return: the energy - """ - miStar = transcript.getTagValue("miRnaStar") - startMiStar = int(miStar.split("-")[0]) - endMiStart = int(miStar.split("-")[1]) - fivePrimeExtension = max(0, transcript.start - startMiStar) + 5 - threePrimeExtension = max(0, endMiStart - transcript.end) + 5 - self.writeInputFile(transcript, False, fivePrimeExtension, threePrimeExtension) - self.startRnaFold() - output = self.parseRnaFoldOutput() - result = self.analyzeRnaFoldOutput(transcript, output, False, fivePrimeExtension, threePrimeExtension) - return result[0] - - - def computeResults(self): - """ - Fold all and fill an output transcript list with the values - """ - progress = Progress(self.inputTranscriptList.getNbTranscripts(), "Handling transcripts", self.verbosity) - self.outputTranscriptList = TranscriptList() - for transcript in self.inputTranscriptList.getIterator(): - result = self.fold(transcript) - transcript.setTagValue("nbBulges", result[1]) - transcript.setTagValue("miRnaStar", "%d-%d" % (result[2], result[3])) - transcript.setTagValue("miRNAstrand", result[4]) - transcript.setTagValue("energy", self.refold(transcript)) - self.outputTranscriptList.addTranscript(transcript) - progress.inc() - progress.done() - - - def getResults(self): - """ - Get an output transcript list with the values - """ - if self.outputTranscriptList == None: - self.computeResults() - return self.outputTranscriptList diff -r 86c781421239 -r 4dded8b1fbc4 SMART/Java/Python/toolLauncher/__init__.py