| 
6
 | 
     1 #
 | 
| 
 | 
     2 # Copyright INRA-URGI 2009-2010
 | 
| 
 | 
     3 # 
 | 
| 
 | 
     4 # This software is governed by the CeCILL license under French law and
 | 
| 
 | 
     5 # abiding by the rules of distribution of free software. You can use,
 | 
| 
 | 
     6 # modify and/ or redistribute the software under the terms of the CeCILL
 | 
| 
 | 
     7 # license as circulated by CEA, CNRS and INRIA at the following URL
 | 
| 
 | 
     8 # "http://www.cecill.info".
 | 
| 
 | 
     9 # 
 | 
| 
 | 
    10 # As a counterpart to the access to the source code and rights to copy,
 | 
| 
 | 
    11 # modify and redistribute granted by the license, users are provided only
 | 
| 
 | 
    12 # with a limited warranty and the software's author, the holder of the
 | 
| 
 | 
    13 # economic rights, and the successive licensors have only limited
 | 
| 
 | 
    14 # liability.
 | 
| 
 | 
    15 # 
 | 
| 
 | 
    16 # In this respect, the user's attention is drawn to the risks associated
 | 
| 
 | 
    17 # with loading, using, modifying and/or developing or reproducing the
 | 
| 
 | 
    18 # software by the user in light of its specific status of free software,
 | 
| 
 | 
    19 # that may mean that it is complicated to manipulate, and that also
 | 
| 
 | 
    20 # therefore means that it is reserved for developers and experienced
 | 
| 
 | 
    21 # professionals having in-depth computer knowledge. Users are therefore
 | 
| 
 | 
    22 # encouraged to load and test the software's suitability as regards their
 | 
| 
 | 
    23 # requirements in conditions enabling the security of their systems and/or
 | 
| 
 | 
    24 # data to be ensured and, more generally, to use and operate it in the
 | 
| 
 | 
    25 # same conditions as regards security.
 | 
| 
 | 
    26 # 
 | 
| 
 | 
    27 # The fact that you are presently reading this means that you have had
 | 
| 
 | 
    28 # knowledge of the CeCILL license and that you accept its terms.
 | 
| 
 | 
    29 #
 | 
| 
 | 
    30 import sys
 | 
| 
 | 
    31 import re
 | 
| 
 | 
    32 from commons.core.seq.Bioseq import Bioseq
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 reverseComplementString = {
 | 
| 
 | 
    35     "A": "T",
 | 
| 
 | 
    36     "C": "G",
 | 
| 
 | 
    37     "G": "C",
 | 
| 
 | 
    38     "T": "A",
 | 
| 
 | 
    39     "U": "A",
 | 
| 
 | 
    40     "M": "K",
 | 
| 
 | 
    41     "R": "Y",
 | 
| 
 | 
    42     "W": "W",
 | 
| 
 | 
    43     "S": "S",
 | 
| 
 | 
    44     "Y": "R",
 | 
| 
 | 
    45     "K": "M",
 | 
| 
 | 
    46     "V": "B",
 | 
| 
 | 
    47     "H": "D",
 | 
| 
 | 
    48     "D": "H",
 | 
| 
 | 
    49     "B": "V",
 | 
| 
 | 
    50     "N": "N",
 | 
| 
 | 
    51     "a": "t",
 | 
| 
 | 
    52     "c": "g",
 | 
| 
 | 
    53     "g": "c",
 | 
| 
 | 
    54     "t": "a",
 | 
| 
 | 
    55     "u": "a",
 | 
| 
 | 
    56     "m": "k",
 | 
| 
 | 
    57     "r": "y",
 | 
| 
 | 
    58     "w": "w",
 | 
| 
 | 
    59     "s": "s",
 | 
| 
 | 
    60     "y": "r",
 | 
| 
 | 
    61     "k": "m",
 | 
| 
 | 
    62     "v": "b",
 | 
| 
 | 
    63     "h": "d",
 | 
| 
 | 
    64     "d": "h",
 | 
| 
 | 
    65     "b": "v",
 | 
| 
 | 
    66     "n": "n"
 | 
| 
 | 
    67 }
 | 
| 
 | 
    68 
 | 
| 
 | 
    69 class Sequence(Bioseq):
 | 
| 
 | 
    70     """A class that codes for a sequence"""
 | 
| 
 | 
    71 
 | 
| 
 | 
    72     def __init__(self, name = "", sequence = ""):
 | 
| 
 | 
    73         super(Sequence, self).__init__(name, sequence)
 | 
| 
 | 
    74         self.name            = self.header        
 | 
| 
 | 
    75         self.quality         = None
 | 
| 
 | 
    76         self.chunkedSequence = None
 | 
| 
 | 
    77         self.chunkedQuality  = None
 | 
| 
 | 
    78         self.integerQuality  = False
 | 
| 
 | 
    79 
 | 
| 
 | 
    80     def setName(self, name=""):
 | 
| 
 | 
    81         super(Sequence, self).setHeader(name)
 | 
| 
 | 
    82             
 | 
| 
 | 
    83     def getName(self):
 | 
| 
 | 
    84         return self.getHeader()
 | 
| 
 | 
    85     
 | 
| 
 | 
    86     def setSequence(self, seq=""):
 | 
| 
 | 
    87         super(Sequence, self).setSequence(seq)
 | 
| 
 | 
    88 
 | 
| 
 | 
    89     def setQuality(self, quality):
 | 
| 
 | 
    90         if quality == None:
 | 
| 
 | 
    91             self.quality = None
 | 
| 
 | 
    92             return
 | 
| 
 | 
    93         if " " in quality:
 | 
| 
 | 
    94             self.quality        = quality.split()
 | 
| 
 | 
    95             self.integerQuality = True
 | 
| 
 | 
    96         else:
 | 
| 
 | 
    97             self.quality = list(quality)
 | 
| 
 | 
    98         
 | 
| 
 | 
    99     def getQuality(self):
 | 
| 
 | 
   100         if self.quality == None:
 | 
| 
 | 
   101             return None
 | 
| 
 | 
   102         if self.integerQuality:
 | 
| 
 | 
   103             return " ".join(self.quality)
 | 
| 
 | 
   104         return "".join(self.quality)
 | 
| 
 | 
   105     
 | 
| 
 | 
   106     def getSize(self):
 | 
| 
 | 
   107         return len(self.getSequence())
 | 
| 
 | 
   108 
 | 
| 
 | 
   109 
 | 
| 
 | 
   110     def copy(self, sequence):
 | 
| 
 | 
   111         self.setName(sequence.getName())
 | 
| 
 | 
   112         self.setSequence(sequence.getSequence())
 | 
| 
 | 
   113         self.setQuality(sequence.getQuality())
 | 
| 
 | 
   114         self.chunkedSequence = None
 | 
| 
 | 
   115         self.chunkedQuality  = None
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 
 | 
| 
 | 
   118     def chunkSequence(self):
 | 
| 
 | 
   119         self.chunkedSequence = []
 | 
| 
 | 
   120         for i in range (0, self.getSize() / 60 + 1):
 | 
| 
 | 
   121             self.chunkedSequence.append(self.getSequence()[i * 60 : min(self.getSize(), (i+1) * 60)])
 | 
| 
 | 
   122         if self.quality != None:
 | 
| 
 | 
   123             self.chunkedQuality = []
 | 
| 
 | 
   124             for i in range (0, self.getSize() / 60 + 1):
 | 
| 
 | 
   125                 self.chunkedQuality.append(self.quality[i * 60 : min(self.getSize(), (i+1) * 60)])
 | 
| 
 | 
   126 
 | 
| 
 | 
   127     def concatenate(self, seq):
 | 
| 
 | 
   128         sequence  = self.getSequence()
 | 
| 
 | 
   129         sequence += seq.getSequence()
 | 
| 
 | 
   130         self.setSequence(sequence)
 | 
| 
 | 
   131         if self.quality != None:
 | 
| 
 | 
   132             sep = " " if self.integerQuality else ""
 | 
| 
 | 
   133             self.setQuality(self.getQuality() + sep + seq.getQuality())
 | 
| 
 | 
   134         self.chunkedSequence = None
 | 
| 
 | 
   135         self.chunkedQuality  = None
 | 
| 
 | 
   136         
 | 
| 
 | 
   137 
 | 
| 
 | 
   138     def printFasta(self):
 | 
| 
 | 
   139         if self.chunkedSequence == None:
 | 
| 
 | 
   140             self.chunkSequence()
 | 
| 
 | 
   141         return ">%s\n%s\n" % (self.getHeader(), "\n".join(self.chunkedSequence))
 | 
| 
 | 
   142 
 | 
| 
 | 
   143 
 | 
| 
 | 
   144     def printFastq(self):
 | 
| 
 | 
   145         if self.chunkedSequence == None:
 | 
| 
 | 
   146             self.chunkSequence()
 | 
| 
 | 
   147         return "@%s\n%s\n+%s\n%s\n" % (self.getHeader(), self.getSequence(), self.getHeader(), self.getQuality())
 | 
| 
 | 
   148 
 | 
| 
 | 
   149 
 | 
| 
 | 
   150     def reverseComplement(self):
 | 
| 
 | 
   151         seq = ""
 | 
| 
 | 
   152         self.chunkedSequence = None
 | 
| 
 | 
   153         self.chunkedQuality  = None
 | 
| 
 | 
   154         for i in range(0, self.getSize()):
 | 
| 
 | 
   155             char = self.getSequence()[i:i+1]
 | 
| 
 | 
   156             if char not in reverseComplementString:
 | 
| 
 | 
   157                 sys.exit("Cannot understand character %s from string %s" % (char, self.getSequence()))
 | 
| 
 | 
   158             seq = "%s%s" % (reverseComplementString[char], seq)
 | 
| 
 | 
   159         self.setSequence(seq)
 | 
| 
 | 
   160         if self.quality != None:
 | 
| 
 | 
   161             self.quality = self.quality[::-1]
 | 
| 
 | 
   162         
 | 
| 
 | 
   163         
 | 
| 
 | 
   164     def containsAmbiguousNucleotides(self):
 | 
| 
 | 
   165         m = re.search("[^ACGTUacgtu]", self.getSequence())
 | 
| 
 | 
   166         if m != None:
 | 
| 
 | 
   167             return True
 | 
| 
 | 
   168         return False
 | 
| 
 | 
   169     
 | 
| 
 | 
   170     
 | 
| 
 | 
   171     def shrinkToFirstNucleotides(self, nbNucleotides):
 | 
| 
 | 
   172         self.chunkedSequence = None
 | 
| 
 | 
   173         self.chunkedQuality  = None
 | 
| 
 | 
   174         self.setSequence(self.getSequence()[0:nbNucleotides])
 | 
| 
 | 
   175         if self.quality != None:
 | 
| 
 | 
   176             self.quality = self.quality[0:nbNucleotides]
 | 
| 
 | 
   177     
 | 
| 
 | 
   178     
 | 
| 
 | 
   179     def shrinkToLastNucleotides(self, nbNucleotides):
 | 
| 
 | 
   180         self.chunkedSequence = None
 | 
| 
 | 
   181         self.chunkedQuality  = None
 | 
| 
 | 
   182         self.setSequence(self.getSequence()[-nbNucleotides:])
 | 
| 
 | 
   183         if self.quality != None:
 | 
| 
 | 
   184             self.quality = self.quality[-nbNucleotides:]
 |