Mercurial > repos > jjohnson > mothur_toolsuite
diff mothur/tools/mothur/mothur_wrapper.py @ 2:e990ac8a0f58
Migrated tool version 1.19.0 from old tool shed archive to new tool shed repository
author | jjohnson |
---|---|
date | Tue, 07 Jun 2011 17:39:06 -0400 |
parents | fcc0778f6987 |
children | e2e2071d2c62 |
line wrap: on
line diff
--- a/mothur/tools/mothur/mothur_wrapper.py Tue Jun 07 17:35:35 2011 -0400 +++ b/mothur/tools/mothur/mothur_wrapper.py Tue Jun 07 17:39:06 2011 -0400 @@ -4,7 +4,7 @@ http://www.mothur.org/ Supports mothur version -mothur v.1.15.0 +mothur v.1.17.0 Class encapsulating Mothur galaxy tool. Expect each invocation to include: @@ -46,7 +46,7 @@ debug = False #debug = True -max_processors = 1 +max_processors = 2 def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) @@ -54,6 +54,8 @@ def __main__(): # tranform the logfile into html + # add extra file ouput + # add object tags for svg files def logfile_to_html(logfile_path,htmlfile_path,tmp_input_dir_name,tmp_output_dir_name,title="Mothur Logfile"): if debug: print >> sys.stdout, 'logfile_to_html %s -> %s' % (logfile_path, htmlfile_path) if debug: print >> sys.stdout, 'logfile_to_html input_dir: %s' % tmp_input_dir_name @@ -69,10 +71,18 @@ continue elif line.find('put directory to ') >= 0: continue + elif line.startswith('Mothur\'s directories:') : + continue + elif line.startswith('outputDir=') : + continue elif line.startswith('Type ') : continue elif line.find(tmp_output_dir_name) >= 0: - line = re.sub(out_pat,'<a href="\\1">\\1</a>',line) + # if debug: print >> sys.stdout, 'logfile_to_html #%s#' % line + if line.strip().endswith('.svg'): + line = re.sub(out_pat,' <object id="object" type="image/svg+xml" data="\\1">\\1</object> <br><a href="\\1">\\1</a> <hr/>',line) + else: + line = re.sub(out_pat,'<a href="\\1">\\1</a>',line) elif line.find(tmp_input_dir_name) >= 0: line = re.sub(in_pat,'\\1',line) html.write(line) @@ -165,6 +175,146 @@ The complexity of inputs should be handled by the glaxy tool xml file. """ cmd_dict = dict() + cmd_dict['align.check'] = dict({'required' : ['fasta','map']}) + #cmd_dict['align.seqs'] = dict({'required' : ['candidate','template'], 'optional' : ['search','ksize','align','match','mismatch','gapopen','gapextend','flip','threshold','processors']}) + cmd_dict['align.seqs'] = dict({'required' : ['fasta','reference',], 'optional' : ['search','ksize','align','match','mismatch','gapopen','gapextend','flip','threshold','processors']}) + cmd_dict['amova'] = dict({'required' : ['phylip','design'] , 'optional' : ['alpha','iters']}) + cmd_dict['anosim'] = dict({'required' : ['phylip','design'] , 'optional' : ['alpha','iters']}) + #cmd_dict['bin.seqs'] = dict({'required' : ['fasta'], 'optional' : ['name','label','group']}) + cmd_dict['bin.seqs'] = dict({'required' : ['list','fasta'], 'optional' : ['name','label','group']}) + #cmd_dict['bootstrap.shared'] = dict({'required' : [], 'optional' : ['calc','groups','iters','label']}) + cmd_dict['bootstrap.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','groups','iters','label']}) + #catchall + cmd_dict['chimera.bellerophon'] = dict({'required' : ['fasta'], 'optional' : ['filter','correction','window','increment','processors']}) + #cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','template'], 'optional' : ['filter','mask','window','numwanted','processors']}) + cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','reference'], 'optional' : ['filter','mask','window','numwanted','processors']}) + #cmd_dict['chimera.check'] = dict({'required' : ['fasta','template'], 'optional' : ['ksize','svg','name','increment','processors']}) + cmd_dict['chimera.check'] = dict({'required' : ['fasta','reference'], 'optional' : ['ksize','svg','name','increment','processors']}) + #cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','template'], 'optional' : ['conservation','quantile','filter','mask','window','increment','processors']}) + cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','reference'], 'optional' : ['conservation','quantile','filter','mask','window','increment','processors']}) + #cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','template'], 'optional' : ['name','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','processors']}) + cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','reference'], 'optional' : ['name','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','processors']}) + #cmd_dict['chop.seqs'] = dict({'required' : ['fasta','numbases'], 'optional' : ['keep','short']}) + cmd_dict['chop.seqs'] = dict({'required' : ['fasta','numbases'], 'optional' : ['countgaps','keep','short']}) + cmd_dict['classify.otu'] = dict({'required' : ['list','taxonomy'],'optional' : ['name','cutoff','label','group','probs','basis','reftaxonomy']}) + #cmd_dict['classify.seqs'] = dict({'required' : ['fasta','template','taxonomy'],'optional' : ['name','search','ksize','method','match','mismatch','gapopen','gapextend','numwanted','probs','processors']}) + cmd_dict['classify.seqs'] = dict({'required' : ['fasta','reference','taxonomy'],'optional' : ['name','search','ksize','method','match','mismatch','gapopen','gapextend','numwanted','probs','processors']}) + cmd_dict['clearcut'] = dict({'required' : [['phylip','fasta']],'optional' : ['seed','norandom','shuffle','neighbor','expblen','expdist','ntrees','matrixout','kimura','jukes','protein','DNA']}) + #cmd_dict['cluster'] = dict({'required' : [] , 'optional' : ['method','cutoff','hard','precision']}) + cmd_dict['cluster'] = dict({'required' : [['phylip','column']] , 'optional' : ['name','method','cutoff','hard','precision','sim','showabund','timing']}) + #cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] , 'optional' : ['method','cutoff','hard','precision']}) + cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] , 'optional' : ['name','method','cutoff','hard','sim','precision']}) + cmd_dict['cluster.fragments'] = dict({'required' : ['fasta'] , 'optional' : ['name','diffs','percent']}) + cmd_dict['cluster.split'] = dict({'required' : [['fasta','phylip','column']] , 'optional' : ['name','method','splitmethod','taxonomy','taxlevel','showabund','cutoff','hard','large','precision','timing','processors']}) + #cmd_dict['collect.shared'] = dict({'required' : [], 'optional' : ['calc','label','freq','groups','all']}) + cmd_dict['collect.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','freq','groups','all']}) + #cmd_dict['collect.single'] = dict({'required' : [], 'optional' : ['calc','abund','size','label','freq']}) + cmd_dict['collect.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','size','label','freq']}) + cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label']}) + cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']}) + cmd_dict['degap.seqs'] = dict({'required' : ['fasta']}) + cmd_dict['deunique.seqs'] = dict({'required' : ['fasta','name'], 'optional' : []}) + #cmd_dict['dist.seqs'] = dict({'required' : ['fasta'], 'optional' : ['calc','countends','output','cutoff','processors']}) + cmd_dict['dist.seqs'] = dict({'required' : ['fasta'], 'optional' : ['calc','countends','output','cutoff','oldfasta','column','processors']}) + #cmd_dict['dist.shared'] = dict({'required' : [], 'optional' : ['calc','label','groups','output']}) + cmd_dict['dist.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','output']}) + cmd_dict['fastq.info'] = dict({'required' : ['fastq'], 'optional' : []}) + cmd_dict['filter.seqs'] = dict({'required' : ['fasta'], 'optional' : ['vertical','trump','soft','hard','processors']}) + #cmd_dict['get.group'] = dict({'required' : [], 'optional' : []}) + cmd_dict['get.group'] = dict({'required' : ['shared'], 'optional' : []}) + cmd_dict['get.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','taxonomy']}) + cmd_dict['get.lineage'] = dict({'required' : ['taxonomy','taxon'],'optional' : ['fasta','name','group','list','alignreport','dups']}) + ##cmd_dict['get.otulist'] = dict({'required' : [], 'optional' : []}) + cmd_dict['get.otulist'] = dict({'required' : ['list'], 'optional' : ['label','sort']}) + #cmd_dict['get.oturep'] = dict({'required' : ['fasta','list'], 'optional' : ['phylip','column','name','label','group','groups','sorted','precision','cutoff','large','weighted']}) + cmd_dict['get.oturep'] = dict({'required' : ['fasta','list',['phylip','column']], 'optional' : ['name','label','group','groups','sorted','precision','cutoff','large','weighted']}) + cmd_dict['get.otus'] = dict({'required' : ['group','list','label'], 'optional' : ['groups','accnos']}) + ##cmd_dict['get.rabund'] = dict({'required' : [],'optional' : []}) + cmd_dict['get.rabund'] = dict({'required' : [['list','sabund']],'optional' : ['sorted','label']}) + #cmd_dict['get.relabund'] = dict({'required' : [],'optional' : ['scale','label','groups']}) + cmd_dict['get.relabund'] = dict({'required' : ['shared'],'optional' : ['scale','label','groups']}) + ##cmd_dict['get.sabund'] = dict({'required' : [],'optional' : []}) + cmd_dict['get.sabund'] = dict({'required' : [['list','rabund']],'optional' : ['label']}) + cmd_dict['get.seqs'] = dict({'required' : ['accnos',['fasta','qfile','name','group','list','alignreport','taxonomy']], 'optional' : ['dups']}) + ##cmd_dict['get.sharedseqs'] = dict({'required' : [], 'optional' : []}) + cmd_dict['get.sharedseqs'] = dict({'required' : ['list','group'], 'optional' : ['label', 'unique', 'shared', 'output', 'fasta']}) + cmd_dict['hcluster'] = dict({'required' : [['column','phylip']] , 'optional' : ['name','method','cutoff','hard','precision','sorted','showabund','timing']}) + #cmd_dict['heatmap.bin'] = dict({'required' : [], 'optional' : ['label','groups','scale','sorted','numotu','fontsize']}) + cmd_dict['heatmap.bin'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['label','groups','scale','sorted','numotu','fontsize']}) + #cmd_dict['heatmap.sim'] = dict({'required' : [], 'optional' : ['calc','phylip','column','name','label','groups']}) + cmd_dict['heatmap.sim'] = dict({'required' : [['shared','phylip','column']], 'optional' : ['calc','name','label','groups']}) + cmd_dict['homova'] = dict({'required' : ['phylip','design'] , 'optional' : ['alpha','iters']}) + cmd_dict['indicator'] = dict({'required' : ['tree',['shared','relabund']], 'optional' : ['groups','label','design']}) + #cmd_dict['libshuff'] = dict({'required' : [],'optional' : ['iters','form','step','cutoff']}) + cmd_dict['libshuff'] = dict({'required' : ['phylip','group'],'optional' : ['groups','iters','form','sim','step','cutoff']}) + cmd_dict['list.seqs'] = dict({'required' : [['fasta','name','group','list','alignreport','taxonomy']]}) + cmd_dict['make,fastq'] = dict({'required' : ['fasta','qfile'] , 'optional' : []}) + #cmd_dict['make.group'] = dict({'required' : ['fasta','groups'], 'optional' : ['output']}) + cmd_dict['make.group'] = dict({'required' : ['fasta','groups'], 'optional' : []}) + cmd_dict['make.shared'] = dict({'required' : ['list','group'], 'optional' : ['label','groups','ordergroup']}) + cmd_dict['mantel'] = dict({'required' : ['phylip','phylip2'] , 'optional' : ['method','iters']}) + cmd_dict['merge.files'] = dict({'required' : ['input','output']}) + cmd_dict['merge.groups'] = dict({'required' : ['shared','design'], 'optional' : ['groups', 'label']}) + #cmd_dict['metastats'] = dict({'required' : ['design'], 'optional' : ['groups', 'label','iters','threshold','sets','processors']}) + cmd_dict['metastats'] = dict({'required' : ['shared','design'], 'optional' : ['groups', 'label','iters','threshold','sets','processors']}) + cmd_dict['nmds'] = dict({'required' : ['phylip'], 'optional' : ['axes','mindim','maxdim','iters','maxiters','epsilon']}) + #cmd_dict['normalize.shared'] = dict({'required' : [], 'optional' : ['label','method','norm','groups']}) + cmd_dict['normalize.shared'] = dict({'required' : [['shared','relabund']], 'optional' : ['label','method','norm','groups','makerelabund']}) + ##cmd_dict['otu.hierarchy'] = dict({'required' : [], 'optional' : []}) + cmd_dict['otu.hierarchy'] = dict({'required' : ['list','label'], 'optional' : ['output']}) + cmd_dict['pairwise.seqs'] = dict({'required' : ['fasta'], 'optional' : ['align','calc','countends','output','cutoff','match','mismatch','gapopen','gapextend','processors']}) + cmd_dict['parse.list'] = dict({'required' : ['list','group'], 'optional' : ['label']}) + #cmd_dict['parsimony'] = dict({'required' : [], 'optional' : ['groups','iters','random','processors']}) + cmd_dict['parsimony'] = dict({'required' : ['tree'], 'optional' : ['group','groups','name','iters','random','processors']}) + #cmd_dict['pca'] = dict({'required' : [], 'optional' : ['label','groups','metric']}) + cmd_dict['pca'] = dict({'required' : [['shared','relabund']], 'optional' : ['label','groups','metric']}) + #cmd_dict['pcoa'] = dict({'required' : ['phylip'], 'optional' : []}) + cmd_dict['pcoa'] = dict({'required' : ['phylip'], 'optional' : ['metric']}) + #cmd_dict['phylo.diversity'] = dict({'required' : [],'optional' : ['groups','iters','freq','scale','rarefy','collect','summary','processors']}) + cmd_dict['phylo.diversity'] = dict({'required' : ['tree','group'],'optional' : ['name','groups','iters','freq','scale','rarefy','collect','summary','processors']}) + cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']}) + #cmd_dict['pre.cluster'] = dict({'required' : ['fasta'], 'optional' : ['names','diffs']}) + cmd_dict['pre.cluster'] = dict({'required' : ['fasta'], 'optional' : ['name','diffs']}) + #cmd_dict['rarefaction.shared'] = dict({'required' : [], 'optional' : ['label','iters','groups','jumble']}) + cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble']}) + #cmd_dict['rarefaction.single'] = dict({'required' : [], 'optional' : ['calc','abund','iters','label','freq','processors']}) + cmd_dict['rarefaction.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','iters','label','freq','processors']}) + #cmd_dict['read.dist'] = dict({'required' : [['phylip','column']], 'optional' : ['name','cutoff','hard','precision','sim','group']}) + #cmd_dict['read.otu'] = dict({'required' : [['rabund','sabund','list','shared','relabund']], 'optional' : ['label','group','groups','ordergroup']}) + #cmd_dict['read.tree'] = dict({'required' : ['tree'], 'optional' : ['name','group']}) + cmd_dict['remove.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','taxonomy']}) + cmd_dict['remove.lineage'] = dict({'required' : ['taxonomy','taxon'],'optional' : ['fasta','name','group','list','alignreport','dups']}) + cmd_dict['remove.otus'] = dict({'required' : ['group','list','label'], 'optional' : ['groups','accnos']}) + #cmd_dict['remove.rare'] = dict({'required' : [['list','sabund','rabund','shared'],'nseqs'], 'optional' : ['group','groups','label','bygroup']}) + cmd_dict['remove.rare'] = dict({'required' : [['list','sabund','rabund','shared'],'nseqs'], 'optional' : ['group','groups','label','bygroup']}) + cmd_dict['remove.seqs'] = dict({'required' : ['accnos',['fasta','qfile','name','group','list','alignreport','taxonomy']], 'optional' : ['dups']}) + cmd_dict['reverse.seqs'] = dict({'required' : ['fasta']}) + cmd_dict['screen.seqs'] = dict({'required' : ['fasta'], 'optional' : ['start','end','maxambig','maxhomop','minlength','maxlength','criteria','optimize','name','group','alignreport','processors']}) + cmd_dict['sens.spec'] = dict({'required' : ['list',['column','phylip']] , 'optional' : ['label','cutoff','hard','precision']}) + cmd_dict['sffinfo'] = dict({'required' : [['sff','sfftxt']], 'optional' : ['fasta','qfile','trim','sfftxt','flow','accnos']}) + cmd_dict['split.abund'] = dict({'required' : ['fasta',['name','list']], 'optional' : ['cutoff','group','groups','label','accnos']}) + #cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : []}) + cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : ['name','groups']}) + cmd_dict['sub.sample'] = dict({'required' : [['fasta','list','sabund','rabund','shared']], 'optional' : ['name','group','groups','label','size','persample']}) + #cmd_dict['summary.seqs'] = dict({'required' : ['fasta'],'outputs' : ['names']}) + cmd_dict['summary.seqs'] = dict({'required' : ['fasta'], 'optional' : ['name','processors']}) + #cmd_dict['summary.shared'] = dict({'required' : [], 'optional' : ['calc','label','groups','all','distance']}) + cmd_dict['summary.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','groups','all','distance','processors']}) + #cmd_dict['summary.single'] = dict({'required' : [], 'optional' : ['calc','abund','size','label','groupmode']}) + cmd_dict['summary.single'] = dict({'required' : [['list','sabund','rabund','shared']], 'optional' : ['calc','abund','size','label','groupmode']}) + #cmd_dict['tree.shared'] = dict({'required' : [], 'optional' : ['groups','calc','cutoff','precision','label']}) + cmd_dict['tree.shared'] = dict({'required' : [['shared','phylip','column']], 'optional' : ['name','groups','calc','cutoff','precision','label']}) + cmd_dict['trim.seqs'] = dict({'required' : ['fasta'], 'optional' : ['group','oligos','qfile','qaverage','qthreshold','qtrim','flip','maxambig','maxhomop','minlength','maxlength','bdiffs','pdiffs','tdiffs','allfiles','keepfirst','removelast']}) + #cmd_dict['unifrac.unweighted'] = dict({'required' : [], 'optional' : ['groups','iters','distance','random','root','processors']}) + cmd_dict['unifrac.unweighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']}) + #cmd_dict['unifrac.weighted'] = dict({'required' : [], 'optional' : ['groups','iters','distance','random','root','processors']}) + cmd_dict['unifrac.weighted'] = dict({'required' : ['tree'], 'optional' : ['name','group','groups','iters','distance','random','root','processors']}) + #cmd_dict['unique.seqs'] = dict({'required' : ['fasta'], 'optional' : ['names']}) + cmd_dict['unique.seqs'] = dict({'required' : ['fasta'], 'optional' : ['name']}) + #cmd_dict['venn'] = dict({'required' : [], 'optional' : ['calc','label','groups','abund','nseqs','permute']}) + cmd_dict['venn'] = dict({'required' : [['list','shared']], 'optional' : ['calc','label','groups','abund','nseqs','permute']}) + ## + """ cmd_dict['merge.files'] = dict({'required' : ['input','output']}) cmd_dict['make.group'] = dict({'required' : ['fasta','groups'], 'optional' : ['output']}) cmd_dict['merge.groups'] = dict({'required' : ['shared','design'], 'optional' : ['groups', 'label']}) @@ -192,7 +342,7 @@ cmd_dict['chimera.ccode'] = dict({'required' : ['fasta','template'], 'optional' : ['filter','mask','window','numwanted','processors']}) cmd_dict['chimera.check'] = dict({'required' : ['fasta','template'], 'optional' : ['ksize','svg','name','increment','processors']}) cmd_dict['chimera.pintail'] = dict({'required' : ['fasta','template'], 'optional' : ['conservation','quantile','filter','mask','window','increment','processors']}) - cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','template'], 'optional' : ['name','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','processors']}) + cmd_dict['chimera.slayer'] = dict({'required' : ['fasta','template'], 'optional' : ['name','search','window','increment','match','mismatch','numwanted','parents','minsim','mincov','iters','minbs','minsnp','divergence','realign','split','processors']}) cmd_dict['dist.seqs'] = dict({'required' : ['fasta'], 'optional' : ['calc','countends','output','cutoff','processors']}) cmd_dict['pairwise.seqs'] = dict({'required' : ['fasta'], 'optional' : ['align','calc','countends','output','cutoff','match','mismatch','gapopen','gapextend','processors']}) cmd_dict['read.dist'] = dict({'required' : [['phylip','column']], 'optional' : ['name','cutoff','hard','precision','sim','group']}) @@ -203,7 +353,7 @@ cmd_dict['cluster.fragments'] = dict({'required' : ['fasta'] , 'optional' : ['name','diffs','percent']}) cmd_dict['cluster.split'] = dict({'required' : [['fasta','phylip','column']] , 'optional' : ['name','method','splitmethod','taxonomy','taxlevel','showabund','cutoff','hard','large','precision','timing','processors']}) cmd_dict['metastats'] = dict({'required' : ['design'], 'optional' : ['groups', 'label','iters','threshold','sets','processors']}) - cmd_dict['summary.single'] = dict({'required' : [], 'optional' : ['calc','abund','size','label','groupmode','processors']}) + cmd_dict['summary.single'] = dict({'required' : [], 'optional' : ['calc','abund','size','label','groupmode']}) cmd_dict['summary.shared'] = dict({'required' : [], 'optional' : ['calc','label','groups','all','distance']}) cmd_dict['collect.single'] = dict({'required' : [], 'optional' : ['calc','abund','size','label','freq']}) cmd_dict['collect.shared'] = dict({'required' : [], 'optional' : ['calc','label','freq','groups','all']}) @@ -214,21 +364,21 @@ cmd_dict['split.abund'] = dict({'required' : ['fasta',['name','list']], 'optional' : ['cutoff','group','groups','label','accnos']}) cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : []}) cmd_dict['tree.shared'] = dict({'required' : [], 'optional' : ['groups','calc','cutoff','precision','label']}) - cmd_dict['unifrac.unweighted'] = dict({'required' : [], 'optional' : ['groups','iters','distance','random','processors']}) - cmd_dict['unifrac.weighted'] = dict({'required' : [], 'optional' : ['groups','iters','distance','random','processors']}) + cmd_dict['unifrac.unweighted'] = dict({'required' : [], 'optional' : ['groups','iters','distance','random','root','processors']}) + cmd_dict['unifrac.weighted'] = dict({'required' : [], 'optional' : ['groups','iters','distance','random','root','processors']}) cmd_dict['parsimony'] = dict({'required' : [], 'optional' : ['groups','iters','random','processors']}) cmd_dict['sffinfo'] = dict({'required' : ['sff'], 'optional' : ['fasta','qfile','trim','sfftxt','flow','accnos']}) cmd_dict['fastq.info'] = dict({'required' : ['fastq'], 'optional' : []}) cmd_dict['heatmap.bin'] = dict({'required' : [], 'optional' : ['label','groups','scale','sorted','numotu','fontsize']}) cmd_dict['heatmap.sim'] = dict({'required' : [], 'optional' : ['calc','phylip','column','name','label','groups']}) - cmd_dict['venn'] = dict({'required' : [], 'optional' : ['calc','label','groups','nseqs','permute']}) + cmd_dict['venn'] = dict({'required' : [], 'optional' : ['calc','label','groups','abund','nseqs','permute']}) cmd_dict['pcoa'] = dict({'required' : ['phylip'], 'optional' : []}) cmd_dict['pca'] = dict({'required' : [], 'optional' : ['label','groups','metric']}) cmd_dict['nmds'] = dict({'required' : ['phylip'], 'optional' : ['axes','mindim','maxdim','iters','maxiters','epsilon']}) cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']}) cmd_dict['get.group'] = dict({'required' : [], 'optional' : []}) cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']}) - cmd_dict['phylo.diversity'] = dict({'required' : [],'optional' : ['groups','iters','freq','processors','scale','rarefy','collect','summary','processors']}) + cmd_dict['phylo.diversity'] = dict({'required' : [],'optional' : ['groups','iters','freq','scale','rarefy','collect','summary','processors']}) cmd_dict['get.oturep'] = dict({'required' : ['fasta','list'], 'optional' : ['phylip','column','name','label','group','groups','sorted','precision','cutoff','large','weighted']}) cmd_dict['get.relabund'] = dict({'required' : [],'optional' : ['scale','label','groups']}) cmd_dict['libshuff'] = dict({'required' : [],'optional' : ['iters','form','step','cutoff']}) @@ -237,10 +387,7 @@ cmd_dict['get.lineage'] = dict({'required' : ['taxonomy','taxon'],'optional' : ['fasta','name','group','list','alignreport','dups']}) cmd_dict['remove.lineage'] = dict({'required' : ['taxonomy','taxon'],'optional' : ['fasta','name','group','list','alignreport','dups']}) cmd_dict['bootstrap.shared'] = dict({'required' : [], 'optional' : ['calc','groups','iters','label']}) - """ - Mothur 1.15 - """ - cmd_dict['cluster.classic'] = dict({'required' : [] , 'optional' : ['method','cutoff','hard','precision']}) + cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] , 'optional' : ['method','cutoff','hard','precision']}) cmd_dict['get.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','taxonomy']}) cmd_dict['remove.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','taxonomy']}) cmd_dict['get.otus'] = dict({'required' : ['group','list','label'], 'optional' : ['groups','accnos']}) @@ -251,6 +398,14 @@ cmd_dict['sub.sample'] = dict({'required' : [['fasta','list','sabund','rabund','shared']], 'optional' : ['name','group','groups','label','size','persample']}) cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label']}) cmd_dict['indicator'] = dict({'required' : ['tree',['shared','relabund']], 'optional' : ['groups','label','design']}) + + cmd_dict['amova'] = dict({'required' : ['phylip','design'] , 'optional' : ['alpha','iters']}) + cmd_dict['homova'] = dict({'required' : ['phylip','design'] , 'optional' : ['alpha','iters']}) + cmd_dict['anosim'] = dict({'required' : ['phylip','design'] , 'optional' : ['alpha','iters']}) + cmd_dict['mantel'] = dict({'required' : ['phylip','phylip2'] , 'optional' : ['method','iters']}) + cmd_dict['make,fastq'] = dict({'required' : ['fasta','qfile'] , 'optional' : []}) + """ + parser = optparse.OptionParser() # Options for managing galaxy interaction parser.add_option( '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) @@ -318,6 +473,7 @@ # parser.add_option( '--taxon', dest='taxon', action="callback", callback=remove_confidence_callback, help='A Taxon' ) parser.add_option( '--candidate', dest='candidate', help=' file ' ) parser.add_option( '--template', dest='template', help=' file ' ) + parser.add_option( '--reference', dest='reference', help=' file ' ) parser.add_option( '--dups', dest='dups', help='if True also apply to the aliases from the names files' ) parser.add_option( '--keep', dest='keep', help='Either front or back to specify the which end of the sequence to keep' ) parser.add_option( '--search', dest='search', help='Method for finding the template sequence: kmer, blast, suffix' ) @@ -353,6 +509,7 @@ parser.add_option( '--output', dest='output', help='Format for output' ) parser.add_option( '--method', dest='method', help='Method to use for analysis - cluster' ) parser.add_option( '--splitmethod', dest='splitmethod', help='Method to split a distance file - cluster.split' ) + parser.add_option( '--split', dest='split', help='Chimera split parameter, whether to detect trimeras and quadmeras' ) parser.add_option( '--abund', dest='abund', type='int', help='Threshold for rare to Abundant OTU classification' ) parser.add_option( '--size', dest='size', type='int', help='Size - sample size' ) parser.add_option( '--groupmode', dest='groupmode', help='Collate groups into one result table' ) @@ -373,12 +530,15 @@ parser.add_option( '--percent', dest='percent', type='int', help='(0-100 percent)' ) parser.add_option( '--divergence', dest='divergence', type='float', help='Divergence cutoff for chimera determination' ) parser.add_option( '--sff', dest='sff', help='Sff file' ) + parser.add_option( '--svg', dest='svg', help='SVG' ) parser.add_option( '--sfftxt', dest='sfftxt', help='Generate a sff.txt file' ) parser.add_option( '--flow', dest='flow', help='Generate a flowgram file' ) parser.add_option( '--trim', dest='trim', help='Whether sequences and quality scores are trimmed to the clipQualLeft and clipQualRight values' ) parser.add_option( '--input', dest='input', help='' ) parser.add_option( '--phylip', dest='phylip', help='' ) + parser.add_option( '--phylip2', dest='phylip2', help='' ) parser.add_option( '--column', dest='column', help='' ) + parser.add_option( '--sort', dest='sort', help='specify sort order' ) parser.add_option( '--sorted', dest='sorted', help='Input is presorted' ) parser.add_option( '--showabund', dest='showabund', help='' ) parser.add_option( '--short', dest='short', help='Keep sequences that are too short to chop' ) @@ -387,6 +547,7 @@ parser.add_option( '--numotu', dest='numotu', help='' ) parser.add_option( '--fontsize', dest='fontsize', help='' ) parser.add_option( '--neqs', dest='neqs', help='' ) + parser.add_option( '--random', dest='random', help='' ) parser.add_option( '--permute', dest='permute', help='' ) parser.add_option( '--rarefy', dest='rarefy', help='' ) parser.add_option( '--collect', dest='collect', help='' ) @@ -408,6 +569,8 @@ parser.add_option( '--sets', dest='sets', help='' ) parser.add_option( '--metric', dest='metric', help='' ) parser.add_option( '--epsilon', dest='epsilon', help='' ) + parser.add_option( '--alpha', dest='alpha', help='' ) + parser.add_option( '--root', dest='root', help='' ) parser.add_option( '--axes', dest='axes', help='table of name column followed by columns of axis values' ) parser.add_option( '--numaxes', dest='numaxes', help='the number of axes' ) parser.add_option( '--metadata', dest='metadata', help='data table with columns of floating-point values' ) @@ -446,7 +609,14 @@ os.makedirs(options.tmpdir) tmp_dir = options.tmpdir else: - tmp_dir = tempfile.mkdtemp() + if options.outputdir != None: + if not os.path.isdir(options.outputdir): + os.makedirs(options.outputdir) + tmp_dir = os.path.join(options.outputdir,'tmp') + if not os.path.isdir(tmp_dir): + os.makedirs(tmp_dir) + else: + tmp_dir = tempfile.mkdtemp() if options.inputdir != None: if not os.path.isdir(options.inputdir): os.makedirs(options.inputdir) @@ -478,11 +648,12 @@ # print >> sys.stderr, cmd_opts # print >> sys.stderr, params # so will appear as blurb for file params.append('%s(%s)' % (options.cmd,cmd_opts)) + if debug: params.append('get.current()') try: # Generate the mothur commandline # http://www.mothur.org/wiki/Command_line_mode cmdline = 'mothur "#' + '; '.join(params) + '"' - # print >> sys.stdout, '%s' % cmdline + if debug: print >> sys.stdout, '%s' % cmdline if tmp_dir == None or not os.path.isdir(tmp_dir): tmp_dir = tempfile.mkdtemp() tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix='.err' ).name @@ -492,6 +663,7 @@ proc = subprocess.Popen( args=cmdline, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() ) # proc = subprocess.Popen( args=cmdline, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE ) returncode = proc.wait() + if debug: print >> sys.stdout, 'returncode %d' % returncode tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp_stderr_name, 'rb' ) @@ -505,12 +677,26 @@ except OverflowError: pass tmp_stderr.close() + tmp_stdout.close() + if debug: print >> sys.stdout, 'parse %s' % tmp_stdout_name if returncode != 0: + try: + # try to copy stdout to the logfile + for output in options.result.split(','): + # Each item has a regex pattern and a file path to a galaxy dataset + (pattern,path) = output.split(':') + if debug: print >> sys.stdout, '%s -> %s' % (pattern,path) + if pattern.find('\.logfile') > 0: + if path != None and os.path.exists(path): + logfile_to_html(tmp_stdout_name,path,inputdir,outputdir,title="Mothur %s Error Logfile" % options.cmd) + break + except: + pass raise Exception, stderr stdout = '' # Parse stdout to provide info - tmp_stdout.close() tmp_stdout = open( tmp_stdout_name, 'rb' ) + # try to find a "little" something interesting to print as info for the galaxy interface info = '' if options.cmd.startswith('chimera') and not options.cmd.endswith('check'): pattern = '^.*$' @@ -533,19 +719,32 @@ info += "Chimeras: %d" % chimera_count else: found_begin = False + info_chars = 0 for line in tmp_stdout: if line.find(outputdir) >= 0: continue + if line.startswith('**************'): + continue if re.match('^Processing.*',line): continue + if re.match('^Reading .*',line): + continue + if re.match('^Merging .*',line): + continue + if re.match('^DONE.*',line): + continue if re.match('.*\.\.\.\s*$',line): continue if re.match('^\d*\s*$',line): continue + # if re.match('^(unique|[0-9.]*)(\t\d+)+',line): # abundance from cluster commands + if not options.cmd.startswith('unifrac') and re.match('^([0-9.]+)(\t\d+)*',line): # abundance from cluster commands, allow unique line into info + continue if re.match('Output .*',line): break - if found_begin: + if found_begin and info_chars < 200: info += "%s" % line + info_chars += len(line) if re.match('mothur > ' + options.cmd + '\(.*\)', line): found_begin = True tmp_stdout.close() @@ -553,6 +752,15 @@ # Collect output files flist = os.listdir(outputdir) if debug: print >> sys.stdout, '%s' % flist + # chimera.check can generate svg files, but they are not listed in the mothur.*.logfile, so we'll added them in here + if options.cmd == 'chimera.check': + svgs = [] + mothurlog = None + for fname in flist: + if fname.endswith('.svg'): + svgs.append(fname) + elif fname.endswith('.logfile'): + mothurlog = fname # process option result first # These are the known galaxy datasets listed in the --result= param if len(flist) > 0 and options.result: @@ -573,23 +781,31 @@ if fname.endswith('.logfile'): # Make the logfile into html logfile_to_html(fpath,path,inputdir,outputdir,title="Mothur %s Logfile" % options.cmd) - elif False and outputdir == options.outputdir: - # Use a hard link if outputdir is the extra_files_path + elif outputdir == options.outputdir: + # Use a hard link if outputdir is the extra_files_path, allows link from mothur logfile without copying data. try: + if debug: print >> sys.stdout, 'link %s %s' % (fpath, path) os.link(fpath, path) except: + if debug: print >> sys.stdout, 'copy %s %s' % (fpath, path) shutil.copy2(fpath, path) else: + if debug: print >> sys.stdout, 'copy2 %s %s' % (fpath, path) shutil.copy2(fpath, path) break + # mothur.*.logfile may be in tmp_dir # chimera.pintail e.g. generates files in the working dir that we might want to save if not found: for fname in os.listdir(tmp_dir): if debug: print >> sys.stdout, 'tmpdir %s match: %s' % (fname,re.match(pattern,fname)) if re.match(pattern,fname): fpath = os.path.join(tmp_dir,fname) - shutil.copy2(fpath, path) - break + if fname.endswith('.logfile'): + # Make the logfile into html + logfile_to_html(fpath,path,inputdir,outputdir,title="Mothur %s Logfile" % options.cmd) + else: + shutil.copy2(fpath, path) + break # Handle the dynamically generated galaxy datasets # http://bitbucket.org/galaxy/galaxy-central/wiki/ToolsMultipleOutput # --new_datasets= specifies files to copy to the new_file_path @@ -605,7 +821,8 @@ if m: fpath = os.path.join(outputdir,fname) if len(m.groups()) > 0: - root = m.groups()[0] + # remove underscores since galaxy uses that as a field separator for dynamic datasets + root = m.groups()[0].replace('_','') else: # remove the ext from the name if it exists, galaxy will add back later # remove underscores since galaxy uses that as a field separator for dynamic datasets @@ -632,13 +849,18 @@ try: if outputdir != options.outputdir and os.path.exists(outputdir): if os.path.islink(outputdir): + if debug: print >> sys.stdout, 'rm outputdir %s' % outputdir os.remove(outputdir) + if debug: print >> sys.stdout, 'rmtree outputdir %s' % outputdir shutil.rmtree(os.path.dirname(outputdir)) else: + if debug: print >> sys.stdout, 'rmtree %s' % outputdir shutil.rmtree(outputdir) if inputdir != options.inputdir and os.path.exists(inputdir): + if debug: print >> sys.stdout, 'rmtree %s' % inputdir shutil.rmtree(inputdir) except: + if debug: print >> sys.stdout, 'rmtree failed' pass