| 
0
 | 
     1 """
 | 
| 
 | 
     2 # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion
 | 
| 
 | 
     3 # rgWebLogo3.py
 | 
| 
 | 
     4 # wrapper to check that all fasta files are same length
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 """
 | 
| 
 | 
     7 import optparse, os, sys, subprocess, tempfile
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it?
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 class WL3:
 | 
| 
 | 
    12     """
 | 
| 
 | 
    13     simple wrapper class to check fasta sequence lengths are all identical
 | 
| 
 | 
    14     """
 | 
| 
 | 
    15     FASTASTARTSYM = '>'
 | 
| 
 | 
    16     badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully'
 | 
| 
 | 
    17 
 | 
| 
 | 
    18     def __init__(self,opts=None):
 | 
| 
 | 
    19         assert opts<>None,'WL3 class needs opts passed in - got None'
 | 
| 
 | 
    20         self.opts = opts
 | 
| 
 | 
    21         self.fastaf = file(self.opts.input,'r')
 | 
| 
 | 
    22         self.clparams = {}
 | 
| 
 | 
    23 
 | 
| 
 | 
    24     def whereis(self,program):
 | 
| 
 | 
    25         for path in os.environ.get('PATH', '').split(':'):
 | 
| 
 | 
    26             if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)):
 | 
| 
 | 
    27                 return os.path.join(path, program)
 | 
| 
 | 
    28         return None
 | 
| 
 | 
    29 
 | 
| 
 | 
    30     def runCL(self):
 | 
| 
 | 
    31         """ construct and run a command line
 | 
| 
 | 
    32         """
 | 
| 
 | 
    33         wl = self.whereis(WEBLOGO)
 | 
| 
 | 
    34         if not wl:
 | 
| 
 | 
    35              print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO
 | 
| 
 | 
    36              print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo'
 | 
| 
 | 
    37              sys.exit(1)
 | 
| 
 | 
    38         cll = [WEBLOGO,]
 | 
| 
 | 
    39         cll += [' '.join(it) for it in list(self.clparams.items())]
 | 
| 
 | 
    40         cl = ' '.join(cll)
 | 
| 
 | 
    41         assert cl > '', 'runCL needs a command line as clparms'
 | 
| 
 | 
    42         fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt')
 | 
| 
 | 
    43         tlf = open(templog,'w')
 | 
| 
 | 
    44         process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf)
 | 
| 
 | 
    45         rval = process.wait()
 | 
| 
 | 
    46         tlf.close()
 | 
| 
 | 
    47         tlogs = ''.join(open(templog,'r').readlines())
 | 
| 
 | 
    48         if len(tlogs) > 1:
 | 
| 
 | 
    49             s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs)
 | 
| 
 | 
    50         else:
 | 
| 
 | 
    51             s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval)
 | 
| 
 | 
    52         os.unlink(templog) # always
 | 
| 
 | 
    53         if rval <> 0:
 | 
| 
 | 
    54              print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval)
 | 
| 
 | 
    55              print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO
 | 
| 
 | 
    56              print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO
 | 
| 
 | 
    57              sys.exit(1)
 | 
| 
 | 
    58         return s
 | 
| 
 | 
    59 
 | 
| 
 | 
    60         
 | 
| 
 | 
    61     def iter_fasta(self):
 | 
| 
 | 
    62         """
 | 
| 
 | 
    63         generator for fasta sequences from a file
 | 
| 
 | 
    64         """
 | 
| 
 | 
    65         aseq = []
 | 
| 
 | 
    66         seqname = None
 | 
| 
 | 
    67         for i,row in enumerate(self.fastaf):
 | 
| 
 | 
    68             if row.startswith(self.FASTASTARTSYM):
 | 
| 
 | 
    69                 if seqname <> None: # already in a sequence
 | 
| 
 | 
    70                     s = ''.join(aseq)
 | 
| 
 | 
    71                     l = len(s)
 | 
| 
 | 
    72                     yield (seqname,l)
 | 
| 
 | 
    73                     seqname = row[1:].strip()
 | 
| 
 | 
    74                     aseq = []
 | 
| 
 | 
    75                 else:
 | 
| 
 | 
    76                     if i > 0:
 | 
| 
 | 
    77                         print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
 | 
| 
 | 
    78                         sys.exit(1)
 | 
| 
 | 
    79                     else:
 | 
| 
 | 
    80                         seqname = row[1:].strip() 
 | 
| 
 | 
    81             else: # sequence row
 | 
| 
 | 
    82                 if seqname == None:
 | 
| 
 | 
    83                     print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
 | 
| 
 | 
    84                     sys.exit(1) 
 | 
| 
 | 
    85                 else:
 | 
| 
 | 
    86                     aseq.append(row.strip())
 | 
| 
 | 
    87                 
 | 
| 
 | 
    88         if seqname <> None: # last one
 | 
| 
 | 
    89             l = len(''.join(aseq))
 | 
| 
 | 
    90             yield (seqname,l)
 | 
| 
 | 
    91                 
 | 
| 
 | 
    92         
 | 
| 
 | 
    93     def fcheck(self):
 | 
| 
 | 
    94         """ are all fasta sequence same length?
 | 
| 
 | 
    95         might be mongo big
 | 
| 
 | 
    96         """
 | 
| 
 | 
    97         flen = None
 | 
| 
 | 
    98         lasti = None
 | 
| 
 | 
    99         f = self.iter_fasta()
 | 
| 
 | 
   100         for i,(seqname,seqlen) in enumerate(f):
 | 
| 
 | 
   101             lasti = i
 | 
| 
 | 
   102             if i == 0:
 | 
| 
 | 
   103                 flen = seqlen
 | 
| 
 | 
   104             else:
 | 
| 
 | 
   105                 if seqlen <> flen:
 | 
| 
 | 
   106                     print >> sys.stderr,self.badseq % self.opts.input
 | 
| 
 | 
   107                     sys.exit(1)
 | 
| 
 | 
   108         return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input,lasti,flen)
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 
 | 
| 
 | 
   111     def run(self):
 | 
| 
 | 
   112         check = self.fcheck()
 | 
| 
 | 
   113         self.clparams['-f'] = self.opts.input
 | 
| 
 | 
   114         self.clparams['-o'] = self.opts.output
 | 
| 
 | 
   115         self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string       
 | 
| 
 | 
   116         self.clparams['-F'] = self.opts.outformat       
 | 
| 
 | 
   117         if self.opts.size <> None:
 | 
| 
 | 
   118             self.clparams['-s'] = self.opts.size
 | 
| 
 | 
   119         if self.opts.lower <> None:
 | 
| 
 | 
   120             self.clparams['-l'] = self.opts.lower
 | 
| 
 | 
   121         if self.opts.upper <> None:
 | 
| 
 | 
   122             self.clparams['-u'] = self.opts.upper        
 | 
| 
 | 
   123         if self.opts.colours <> None:
 | 
| 
 | 
   124             self.clparams['-c'] = self.opts.colours
 | 
| 
 | 
   125         if self.opts.units <> None:
 | 
| 
 | 
   126             self.clparams['-U'] = self.opts.units
 | 
| 
 | 
   127         s = self.runCL()
 | 
| 
 | 
   128         return check,s
 | 
| 
 | 
   129 
 | 
| 
 | 
   130 
 | 
| 
 | 
   131 if __name__ == '__main__':
 | 
| 
 | 
   132     '''
 | 
| 
 | 
   133     called as
 | 
| 
 | 
   134 <command interpreter="python"> 
 | 
| 
 | 
   135     rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours"
 | 
| 
 | 
   136 #if $range.mode == 'part'
 | 
| 
 | 
   137 -l "$range.seqstart" -u "$range.seqend"
 | 
| 
 | 
   138 #end if
 | 
| 
 | 
   139     </command>
 | 
| 
 | 
   140 
 | 
| 
 | 
   141     '''
 | 
| 
 | 
   142     op = optparse.OptionParser()
 | 
| 
 | 
   143     op.add_option('-i', '--input', default=None)
 | 
| 
 | 
   144     op.add_option('-F', '--outformat', default='png')
 | 
| 
 | 
   145     op.add_option('-s', '--size', default=None) 
 | 
| 
 | 
   146     op.add_option('-o', '--output', default='rgWebLogo3')
 | 
| 
 | 
   147     op.add_option('-t', '--logoname', default='rgWebLogo3')
 | 
| 
 | 
   148     op.add_option('-c', '--colours', default=None)
 | 
| 
 | 
   149     op.add_option('-l', '--lower', default=None)
 | 
| 
 | 
   150     op.add_option('-u', '--upper', default=None)  
 | 
| 
 | 
   151     op.add_option('-U', '--units', default=None)  
 | 
| 
 | 
   152     opts, args = op.parse_args()
 | 
| 
 | 
   153     assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open'
 | 
| 
 | 
   154     assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input
 | 
| 
 | 
   155     w = WL3(opts)
 | 
| 
 | 
   156     checks,s = w.run()
 | 
| 
 | 
   157     print >> sys.stdout, checks # for info
 |