# HG changeset patch # User Jim Johnson # Date 1378309894 18000 # Node ID ec8df51e841a5bdbb2300bca74f78de385f6092a # Parent a3eed59297eaa54a67232e89cfc8bd9392b6eb8a Fixes courtesy of Peter Briggs: metagenomics.py: - Groups class: Fix for when second column not present - Axes class: make 'sniff' method more sensitive to try and restrict arbitrary tabular data uploads being sniffed as this type mothur_wrapper.py: - update cmd_dict['chimera.perseus'], to use correct inputs - add --beta option (needed for chimera.perseus tool) - add function for converting input floats from scientific notation (e.g. 1e-6, which mothur can't handle) to decimal format (e.g. 0.00001, which it can) chimera.perseus.xml: - add output data item for the "accnos" file (not previous captured in the history) trim.flows.xml: - cosmetic change: base name for additional output data items is now "trim.flows", rather than "logfile" (clarifies history items) shhh.flows.xml: - cosmetic change: update default value specified in help for mindiff to be consistent with that given in the mothur documentation diff -r a3eed59297ea -r ec8df51e841a mothur/lib/galaxy/datatypes/metagenomics.py --- a/mothur/lib/galaxy/datatypes/metagenomics.py Tue Jul 30 09:26:31 2013 -0500 +++ b/mothur/lib/galaxy/datatypes/metagenomics.py Wed Sep 04 10:51:34 2013 -0500 @@ -689,7 +689,11 @@ fh = open( dataset.file_name ) for line in fh: fields = line.strip().split('\t') - group_names.add(fields[1]) + try: + group_names.add(fields[1]) + except IndexError: + # Ignore missing 2nd column + pass dataset.metadata.groups = [] dataset.metadata.groups += group_names finally: @@ -1202,6 +1206,9 @@ try: for i in range(1, col_cnt): check = float(fields[i]) + # Check abs value is <= 1.0 + if abs(check) > 1.0: + return False # Also test for whether value is an integer try: check = int(fields[i]) diff -r a3eed59297ea -r ec8df51e841a mothur/tools/mothur/chimera.perseus.xml --- a/mothur/tools/mothur/chimera.perseus.xml Tue Jul 30 09:26:31 2013 -0500 +++ b/mothur/tools/mothur/chimera.perseus.xml Wed Sep 04 10:51:34 2013 -0500 @@ -4,6 +4,7 @@ mothur_wrapper.py --cmd='chimera.perseus' --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.perseus\.chimeras?$:'$out_file + --result='^mothur.\S+\.logfile$:'$logfile,'^\S+\.perseus\.chimeras?$:'$out_file,'^\S+\.perseus\.accnos$:'$out_accnos --outputdir='$logfile.extra_files_path' --fasta=$fasta --name=$name @@ -32,6 +33,7 @@ + mothur diff -r a3eed59297ea -r ec8df51e841a mothur/tools/mothur/mothur_wrapper.py --- a/mothur/tools/mothur/mothur_wrapper.py Tue Jul 30 09:26:31 2013 -0500 +++ b/mothur/tools/mothur/mothur_wrapper.py Wed Sep 04 10:51:34 2013 -0500 @@ -19,7 +19,7 @@ # Galaxy output dataset extra_files_path direcotry in which to put all output files --outputdir='/home/galaxy/data/database/files/002/dataset_2613_files' # The id of one of the galaxy outputs (e.g. the mothur logfile) used for dynamic dataset generation - # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput + # http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files --datasetid='2578' # The galaxy directory in which to copy all output files for dynamic dataset generation --new_file_path='/home/galaxy/data/database/tmp' @@ -120,13 +120,49 @@ shutil.copy(val,os.path.join(input_dir,fname)) vals.append(fname) if debug: print >> sys.stderr, "scp %s %s" % (val, os.path.join(input_dir,fname)) - else: - vals.append(val) + else: + vals.append(convert_value(val)) return '-'.join(vals) - return value + return convert_value(value) + # Ensure parameter values are in a format that mothur can handle + def convert_value(value): + """ + Convert parameter values to a format suitable for input to mothur + (specifically floating point numbers supplied in scientific + notation) + """ + if value is None: + # Return None + x = None + else: + x = str(value) + # Integer + try: + x = int(x) + except ValueError: + # Float + try: + x = float(x) + if str(x).count('e'): + # Ugly hacks to convert scientific notation (which + # mothur can't handle) into decimal format + places = int(str(x).split('e')[1].lstrip('-')) + if x < 1.0: + x = '%.*f' % (int(places),x) + else: + x = '%*.f' % (int(places),x) + except ValueError: + # Neither integer nor float + pass + # Return whatever we finished up with + return x #Parse Command Line and get a list of params # Prefix is used to differentiate between prerequisite commands: read.otu, read.dist, read.tree def get_params(cmd, options, input_dir, prefix=''): + """ + Gather parameter values for the specified mothur command 'cmd', + using the definition from the 'cmd_dict' dictionary. + """ if debug: print >> sys.stderr, options params = [] for opt in cmd_dict[cmd]['required']: @@ -185,7 +221,7 @@ cmd_dict['chimera.bellerophon'] = dict({'required' : ['fasta'], 'optional' : ['filter','correction','window','increment','processors']}) cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','reference'], 'optional' : ['filter','mask','window','numwanted','save','processors']}) cmd_dict['chimera.check'] = dict({'required' : ['fasta','reference'], 'optional' : ['ksize','svg','name','increment','save','processors']}) - cmd_dict['chimera.perseus'] = dict({'required' : ['fasta','name'], 'optional' : ['group','sigma']}) + cmd_dict['chimera.perseus'] = dict({'required' : ['fasta','name'], 'optional' : ['group','alpha','beta','cutoff']}) cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','reference'], 'optional' : ['conservation','quantile','filter','mask','window','increment','save','processors']}) cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','group','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','blastlocation','save','processors']}) cmd_dict['chimera.uchime'] = dict({'required' : ['fasta'], 'optional' : ['name','group','reference','abskew','chimealns','minh','mindiv','xn','dn','xa','chunks','minchunk','idsmoothwindow','minsmoothid','maxp','skipgaps','skipgaps2','minlen','maxlen','ucl','queryfract','processors']}) @@ -484,6 +520,7 @@ parser.add_option( '--matrixmodel', dest='matrixmodel', help='' ) parser.add_option( '--epsilon', dest='epsilon', help='' ) parser.add_option( '--alpha', dest='alpha', help='' ) + parser.add_option( '--beta', dest='beta', help='' ) parser.add_option( '--root', dest='root', help='' ) parser.add_option( '--axes', dest='axes', help='table of name column followed by columns of axis values' ) parser.add_option( '--numaxes', dest='numaxes', help='the number of axes' ) diff -r a3eed59297ea -r ec8df51e841a mothur/tools/mothur/shhh.flows.xml --- a/mothur/tools/mothur/shhh.flows.xml Tue Jul 30 09:26:31 2013 -0500 +++ b/mothur/tools/mothur/shhh.flows.xml Wed Sep 04 10:51:34 2013 -0500 @@ -56,7 +56,7 @@ + help="default .000001 (10^-6)"> diff -r a3eed59297ea -r ec8df51e841a mothur/tools/mothur/trim.flows.xml --- a/mothur/tools/mothur/trim.flows.xml Tue Jul 30 09:26:31 2013 -0500 +++ b/mothur/tools/mothur/trim.flows.xml Wed Sep 04 10:51:34 2013 -0500 @@ -44,7 +44,7 @@ #if $oligo.sdiffs.__str__ != '' and int($oligo.sdiffs.__str__) > 0: --sdiffs=$oligo.sdiffs #end if - --datasetid='$logfile.id' --new_file_path='$__new_file_path__' + --datasetid='$trim_flow.id' --new_file_path='$__new_file_path__' --new_datasets='^\S+?\.(\S+\.flow)$:sff.flow' #end if $fasta