changeset 32:ec8df51e841a

Fixes courtesy of Peter Briggs: metagenomics.py: - Groups class: Fix for when second column not present - Axes class: make 'sniff' method more sensitive to try and restrict arbitrary tabular data uploads being sniffed as this type mothur_wrapper.py: - update cmd_dict['chimera.perseus'], to use correct inputs - add --beta option (needed for chimera.perseus tool) - add function for converting input floats from scientific notation (e.g. 1e-6, which mothur can't handle) to decimal format (e.g. 0.00001, which it can) chimera.perseus.xml: - add output data item for the "accnos" file (not previous captured in the history) trim.flows.xml: - cosmetic change: base name for additional output data items is now "trim.flows", rather than "logfile" (clarifies history items) shhh.flows.xml: - cosmetic change: update default value specified in help for mindiff to be consistent with that given in the mothur documentation
author Jim Johnson <jj@umn.edu>
date Wed, 04 Sep 2013 10:51:34 -0500
parents a3eed59297ea
children d53b9eb16c2d
files mothur/lib/galaxy/datatypes/metagenomics.py mothur/tools/mothur/chimera.perseus.xml mothur/tools/mothur/mothur_wrapper.py mothur/tools/mothur/shhh.flows.xml mothur/tools/mothur/trim.flows.xml
diffstat 5 files changed, 54 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/mothur/lib/galaxy/datatypes/metagenomics.py	Tue Jul 30 09:26:31 2013 -0500
+++ b/mothur/lib/galaxy/datatypes/metagenomics.py	Wed Sep 04 10:51:34 2013 -0500
@@ -689,7 +689,11 @@
             fh = open( dataset.file_name )
             for line in fh:
                 fields = line.strip().split('\t')
-                group_names.add(fields[1])
+                try:
+                    group_names.add(fields[1])
+                except IndexError:
+                    # Ignore missing 2nd column
+                    pass
             dataset.metadata.groups = []
             dataset.metadata.groups += group_names
         finally:
@@ -1202,6 +1206,9 @@
                         try:
                             for i in range(1, col_cnt):
                                 check = float(fields[i])
+                                # Check abs value is <= 1.0
+                                if abs(check) > 1.0:
+                                    return False
                                 # Also test for whether value is an integer
                                 try:
                                     check = int(fields[i])
--- a/mothur/tools/mothur/chimera.perseus.xml	Tue Jul 30 09:26:31 2013 -0500
+++ b/mothur/tools/mothur/chimera.perseus.xml	Wed Sep 04 10:51:34 2013 -0500
@@ -4,6 +4,7 @@
   mothur_wrapper.py 
   --cmd='chimera.perseus'
   --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.perseus\.chimeras?$:'$out_file
+  --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.perseus\.chimeras?$:'$out_file,'^\S+\.perseus\.accnos$:'$out_accnos
   --outputdir='$logfile.extra_files_path'
   --fasta=$fasta
   --name=$name
@@ -32,6 +33,7 @@
  <outputs>
   <data format="html" name="logfile" label="${tool.name} on ${on_string}: logfile" />
   <data format="tabular" name="out_file" label="${tool.name} on ${on_string}: perseus.chimeras" />
+  <data format="accnos" name="out_accnos" label="${tool.name} on ${on_string}: perseus.accnos" />
  </outputs>
  <requirements>
   <requirement type="package" version="1.27">mothur</requirement>
--- a/mothur/tools/mothur/mothur_wrapper.py	Tue Jul 30 09:26:31 2013 -0500
+++ b/mothur/tools/mothur/mothur_wrapper.py	Wed Sep 04 10:51:34 2013 -0500
@@ -19,7 +19,7 @@
  # Galaxy output dataset extra_files_path direcotry in which to put all output files
  --outputdir='/home/galaxy/data/database/files/002/dataset_2613_files'
  # The id of one of the galaxy outputs (e.g. the mothur logfile) used for dynamic dataset generation
- #  http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput
+ #  http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
  --datasetid='2578'
  # The galaxy directory in which to copy all output files for dynamic dataset generation
  --new_file_path='/home/galaxy/data/database/tmp'
@@ -120,13 +120,49 @@
                         shutil.copy(val,os.path.join(input_dir,fname))
                     vals.append(fname)
                     if debug: print >> sys.stderr, "scp %s %s" % (val, os.path.join(input_dir,fname))
-                else: 
-                    vals.append(val)
+                else:
+                    vals.append(convert_value(val))
             return '-'.join(vals)
-        return value
+        return convert_value(value)
+    # Ensure parameter values are in a format that mothur can handle
+    def convert_value(value):
+        """
+        Convert parameter values to a format suitable for input to mothur
+        (specifically floating point numbers supplied in scientific
+        notation)
+        """
+        if value is None:
+            # Return None
+            x = None
+        else:
+            x = str(value)
+            # Integer
+            try:
+                x = int(x)
+            except ValueError:
+                # Float
+                try:
+                    x = float(x)
+                    if str(x).count('e'):
+                        # Ugly hacks to convert scientific notation (which
+                        # mothur can't handle) into decimal format
+                        places = int(str(x).split('e')[1].lstrip('-'))
+                        if x < 1.0:
+                            x = '%.*f' % (int(places),x)
+                        else:
+                            x = '%*.f' % (int(places),x)
+                except ValueError:
+                    # Neither integer nor float
+                    pass
+        # Return whatever we finished up with
+        return x
     #Parse Command Line and get a list of params
     # Prefix is used to differentiate between prerequisite commands: read.otu, read.dist, read.tree
     def get_params(cmd, options, input_dir, prefix=''):
+        """
+        Gather parameter values for the specified mothur command 'cmd',
+        using the definition from the 'cmd_dict' dictionary.
+        """
         if debug: print >> sys.stderr, options
         params = []  
         for opt in cmd_dict[cmd]['required']:
@@ -185,7 +221,7 @@
     cmd_dict['chimera.bellerophon'] = dict({'required' : ['fasta'], 'optional' : ['filter','correction','window','increment','processors']})
     cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','reference'], 'optional' : ['filter','mask','window','numwanted','save','processors']})
     cmd_dict['chimera.check'] = dict({'required' : ['fasta','reference'], 'optional' : ['ksize','svg','name','increment','save','processors']})
-    cmd_dict['chimera.perseus'] = dict({'required' : ['fasta','name'], 'optional' : ['group','sigma']})
+    cmd_dict['chimera.perseus'] = dict({'required' : ['fasta','name'], 'optional' : ['group','alpha','beta','cutoff']})
     cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','reference'], 'optional' : ['conservation','quantile','filter','mask','window','increment','save','processors']})
     cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','group','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','blastlocation','save','processors']})
     cmd_dict['chimera.uchime'] = dict({'required' : ['fasta'], 'optional' : ['name','group','reference','abskew','chimealns','minh','mindiv','xn','dn','xa','chunks','minchunk','idsmoothwindow','minsmoothid','maxp','skipgaps','skipgaps2','minlen','maxlen','ucl','queryfract','processors']})
@@ -484,6 +520,7 @@
     parser.add_option( '--matrixmodel', dest='matrixmodel', help='' )
     parser.add_option( '--epsilon', dest='epsilon', help='' )
     parser.add_option( '--alpha', dest='alpha', help='' )
+    parser.add_option( '--beta', dest='beta', help='' )
     parser.add_option( '--root', dest='root', help='' )
     parser.add_option( '--axes', dest='axes', help='table of name column followed by columns of axis values' )
     parser.add_option( '--numaxes', dest='numaxes', help='the number of axes' )
--- a/mothur/tools/mothur/shhh.flows.xml	Tue Jul 30 09:26:31 2013 -0500
+++ b/mothur/tools/mothur/shhh.flows.xml	Wed Sep 04 10:51:34 2013 -0500
@@ -56,7 +56,7 @@
   </param>
 
   <param name="mindelta" type="float" value="" optional="true" label="mindelta - threshold for determining how much change in the flowgram correction is allowed" 
-         help="default .0000001 (10^-6)">
+         help="default .000001 (10^-6)">
    <validator type="in_range" message="mindelta between 0. and .1" min="0.0" max="0.1"/>
   </param>
 
--- a/mothur/tools/mothur/trim.flows.xml	Tue Jul 30 09:26:31 2013 -0500
+++ b/mothur/tools/mothur/trim.flows.xml	Wed Sep 04 10:51:34 2013 -0500
@@ -44,7 +44,7 @@
    #if $oligo.sdiffs.__str__ != '' and int($oligo.sdiffs.__str__) > 0:
     --sdiffs=$oligo.sdiffs
    #end if
-   --datasetid='$logfile.id' --new_file_path='$__new_file_path__'
+   --datasetid='$trim_flow.id' --new_file_path='$__new_file_path__'
    --new_datasets='^\S+?\.(\S+\.flow)$:sff.flow'
   #end if
   $fasta