diff mothur/tools/mothur/mothur_wrapper.py @ 27:49058b1f8d3f

Update to mothur version 1.27 and add tool_dependencies.xml to automatically install mothur
author Jim Johnson <jj@umn.edu>
date Tue, 04 Dec 2012 11:05:19 -0600
parents 5c77423823cb
children 7238483c96fa
line wrap: on
line diff
--- a/mothur/tools/mothur/mothur_wrapper.py	Wed May 16 13:12:05 2012 -0500
+++ b/mothur/tools/mothur/mothur_wrapper.py	Tue Dec 04 11:05:19 2012 -0600
@@ -4,7 +4,7 @@
 http://www.mothur.org/
 
 Supports mothur version 
-mothur v.1.24.0
+mothur v.1.27.0
 
 Class encapsulating Mothur galaxy tool.
 Expect each invocation to include:
@@ -195,10 +195,10 @@
     cmd_dict['classify.tree'] = dict({'required' : ['taxonomy','tree'],'optional' : ['name','group','cutoff']})
     #clear.memory ## not needed in galaxy framework
     cmd_dict['clearcut'] = dict({'required' : [['phylip','fasta']],'optional' : ['seed','norandom','shuffle','neighbor','expblen','expdist','ntrees','matrixout','kimura','jukes','protein','DNA']})
-    cmd_dict['cluster'] = dict({'required' : [['phylip','column']] ,  'optional' : ['name','method','cutoff','hard','precision','sim','showabund','timing']})
-    cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] ,  'optional' : ['name','method','cutoff','hard','sim','precision']})
+    cmd_dict['cluster'] = dict({'required' : [['phylip','column']] ,  'optional' : ['name','count','method','cutoff','hard','precision','sim','showabund','timing']})
+    cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] ,  'optional' : ['name','count','method','cutoff','hard','sim','precision']})
     cmd_dict['cluster.fragments'] = dict({'required' : ['fasta'] ,  'optional' : ['name','diffs','percent']})
-    cmd_dict['cluster.split'] = dict({'required' : [['fasta','phylip','column']] ,  'optional' : ['name','method','splitmethod','taxonomy','taxlevel','showabund','cutoff','hard','large','precision','timing','processors']})
+    cmd_dict['cluster.split'] = dict({'required' : [['fasta','phylip','column']] ,  'optional' : ['name','count','method','splitmethod','taxonomy','taxlevel','showabund','cutoff','hard','large','precision','classic','timing','processors']})
     cmd_dict['collect.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','freq','groups','all']})
     cmd_dict['collect.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','size','label','freq']})
     cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label','cutoff']})
@@ -206,10 +206,10 @@
     cmd_dict['cooccurrence'] = dict({'required' : ['shared'], 'optional' : ['iters','metric','matrixmodel','groups','label']})
 
     cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']})
-    cmd_dict['count.groups'] = dict({'required' : ['group','shared'], 'optional' : ['accnos','groups']})
-    cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups']})
+    cmd_dict['count.groups'] = dict({'required' : [['group','shared','count']], 'optional' : ['accnos','groups']})
+    cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups','large']})
 
-    cmd_dict['create.database'] = dict({'required' : ['list, repfasta','repname','contaxonomy'], 'optional' : ['group','label']})
+    cmd_dict['create.database'] = dict({'required' : [['list','shared'],'repfasta','repname','contaxonomy'], 'optional' : ['group','label']})
 
     cmd_dict['degap.seqs'] = dict({'required' : ['fasta']})
     cmd_dict['deunique.seqs'] = dict({'required' : ['fasta','name'],  'optional' : []})
@@ -238,6 +238,7 @@
     cmd_dict['list.seqs'] = dict({'required' : [['fasta','name','group','list','alignreport','taxonomy']]})
 
     cmd_dict['make.biom'] = dict({'required' : ['shared'] ,  'optional' : ['contaxonomy','matrixtype','groups','label']})
+    cmd_dict['make.contigs'] = dict({'required' : ['ffastq','rfastq',], 'optional' : ['align','match','mismatch','gapopen','gapextend','threshold','oligos','bdiffs','pdiffs','tdiffs','processors']})
 
     cmd_dict['make.fastq'] = dict({'required' : ['fasta','qfile'] ,  'optional' : []})
     cmd_dict['make.group'] = dict({'required' : ['fasta','groups'],  'optional' : []})
@@ -248,7 +249,7 @@
     cmd_dict['metastats'] = dict({'required' : ['shared','design'],  'optional' : ['groups', 'label','iters','threshold','sets','processors']})
     cmd_dict['nmds'] = dict({'required' : ['phylip'], 'optional' : ['axes','mindim','maxdim','iters','maxiters','epsilon']})
     cmd_dict['normalize.shared'] = dict({'required' : [['shared','relabund']], 'optional' : ['label','method','norm','groups','makerelabund']})
-    cmd_dict['otu.association'] = dict({'required' : [['shared','relabund']], 'optional' : ['groups', 'label','method']})
+    cmd_dict['otu.association'] = dict({'required' : [['shared','relabund']], 'optional' : ['groups', 'label','method','metadata']})
     cmd_dict['otu.hierarchy'] = dict({'required' : ['list','label'], 'optional' : ['output']})
     cmd_dict['pairwise.seqs'] = dict({'required' : ['fasta'],  'optional' : ['align','calc','countends','output','cutoff','match','mismatch','gapopen','gapextend','processors']})
     cmd_dict['parse.list'] = dict({'required' : ['list','group'], 'optional' : ['label']})
@@ -261,7 +262,7 @@
     cmd_dict['phylo.diversity'] = dict({'required' : ['tree'],'optional' : ['group','name','groups','iters','freq','scale','rarefy','collect','summary','processors']})
     cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']})
     cmd_dict['pre.cluster'] = dict({'required' : ['fasta'],  'optional' : ['name','diffs','group','processors']})
-    cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble']})
+    cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble','design','sets','groupmode']})
     cmd_dict['rarefaction.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','iters','label','freq','processors']})
     cmd_dict['remove.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','shared','taxonomy','design']})
     cmd_dict['remove.lineage'] = dict({'required' : ['taxonomy','taxon'],'optional' : ['fasta','name','group','list','alignreport','dups']})
@@ -273,7 +274,7 @@
     cmd_dict['sens.spec'] = dict({'required' : ['list',['column','phylip']] , 'optional' : ['label','cutoff','hard','precision']})
     cmd_dict['seq.error'] = dict({'required' : ['fasta','reference'] , 'optional' : ['name','qfile','report','ignorechimeras','threshold','processors']})
     cmd_dict['sffinfo'] = dict({'required' : [['sff','sfftxt']], 'optional' : ['fasta','qfile','trim','sfftxt','flow','accnos']})
-    cmd_dict['shhh.flows'] = dict({'required' : [['flow','files']], 'optional' : ['lookup','maxiter','mindelta','cutoff','sigma','order','processors']})
+    cmd_dict['shhh.flows'] = dict({'required' : [['flow','files']], 'optional' : ['lookup','maxiter','mindelta','cutoff','sigma','order','large','processors']})
     cmd_dict['shhh.seqs'] = dict({'required' : [['fasta','files']], 'optional' : ['group','sigma','processors']})
     cmd_dict['split.abund'] = dict({'required' : ['fasta',['name','list']], 'optional' : ['cutoff','group','groups','label','accnos']})
     cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : ['name','groups']})
@@ -331,6 +332,8 @@
     parser.add_option( '--numbases', dest='numbases', type="int", help='Number of base to allow' )
     parser.add_option( '--fasta', dest='fasta', help='fasta file paths' )
     parser.add_option( '--fastq', dest='fastq', help='fastq file paths' )
+    parser.add_option( '--ffastq', dest='ffastq', help='forward fastq file' )
+    parser.add_option( '--rfastq', dest='rfastq', help='reverse fastq file' )
     parser.add_option( '--qfile', dest='qfile', help='Sequence read quality file (454 platform)' )
     parser.add_option( '--repfasta', dest='repfasta', help='fasta file paths' )
     parser.add_option( '--qaverage', dest='qaverage', type="int", help='Remove sequences that have an average quality below the value' )
@@ -401,6 +404,7 @@
     parser.add_option( '--soft', dest='soft', type='int', help='Soft Mask - percentage required to retain column. (0-100)' )
     parser.add_option( '--hard', dest='hard', help='Hard Column Filter - A file should only contain one line consisting of 0 and 1 chars' )
     parser.add_option( '--calc', dest='calc', help='Calc Method - Gap Penality' )
+    parser.add_option( '--count', dest='count',  help='Count file' )
     parser.add_option( '--countends', dest='countends',  help='Penalize terminal gaps' )
     parser.add_option( '--cutoff', dest='cutoff', help='Distance Cutoff threshold, discard larger distances' )
     parser.add_option( '--countgaps', dest='countgaps',  help='count gaps as bases' )
@@ -515,6 +519,7 @@
     parser.add_option( '--matrixtype', dest='matrixtype',  help='' )
     parser.add_option( '--consensus', dest='consensus',  help='boolean' )
     parser.add_option( '--biom', dest='biom',  help='biom file' )
+    parser.add_option( '--classic', dest='classic',  help='boolean' )
     # include read.otu options
     parser.add_option( '--rabund', dest='rabund', help='' )
     parser.add_option( '--sabund', dest='sabund', help='' )
@@ -653,10 +658,21 @@
                 if re.match(pattern,line):
                     chimera_count += 1
             info += "Chimeras: %d" % chimera_count
+        elif options.cmd == 'count.groups':
+            fh = open(os.path.join(outputdir,'tmp.groups.count'),'w')
+            for line in tmp_stdout:
+                m = re.match('(.+) contains (\d+)\.',line)
+                if m and len(m.groups()) == 2:
+                    info += line  
+                    print >> fh, "%s\t%s\n" % (m.group(1),m.group(2))
+            fh.close()
         else:
             found_begin = False
             info_chars = 0
             for line in tmp_stdout:
+                if re.match('mothur > ' + options.cmd + '\(.*\)', line):
+                    found_begin = True
+                    continue
                 if line.find(outputdir) >= 0:
                     continue
 	        if line.startswith('**************'):
@@ -671,18 +687,19 @@
                     continue
                 if re.match('.*\.\.\.\s*$',line):
                     continue
-                if re.match('^\d*\s*$',line):
+                if re.match('^\d*\s*$',line) and not line.find(' contains '):
                     continue
                 # if re.match('^(unique|[0-9.]*)(\t\d+)+',line):  # abundance from cluster commands 
-                if not options.cmd.startswith('unifrac') and re.match('^(unique|[0-9.]+)(\t\d+)*',line):  # abundance from cluster commands, allow unique line into info
+                if (not (options.cmd.startswith('unifrac') or options.cmd.startswith('count.groups')) 
+                    and re.match('^(unique|[0-9.]+)(\t\d+)*',line)):  # abundance from cluster commands, allow unique line into info
                     continue
                 if re.match('Output .*',line):
                     break
+		if re.match('mothur > quit()',line):
+                    break
                 if found_begin and info_chars < 200:
                     info += "%s" % line
                     info_chars += len(line)
-                if re.match('mothur > ' + options.cmd + '\(.*\)', line):
-                    found_begin = True
         tmp_stdout.close()
         print >> sys.stdout, info
         # Collect output files