Mercurial > repos > jjohnson > mothur_toolsuite
diff mothur/tools/mothur/mothur_wrapper.py @ 27:49058b1f8d3f
Update to mothur version 1.27 and add tool_dependencies.xml to automatically install mothur
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 04 Dec 2012 11:05:19 -0600 |
parents | 5c77423823cb |
children | 7238483c96fa |
line wrap: on
line diff
--- a/mothur/tools/mothur/mothur_wrapper.py Wed May 16 13:12:05 2012 -0500 +++ b/mothur/tools/mothur/mothur_wrapper.py Tue Dec 04 11:05:19 2012 -0600 @@ -4,7 +4,7 @@ http://www.mothur.org/ Supports mothur version -mothur v.1.24.0 +mothur v.1.27.0 Class encapsulating Mothur galaxy tool. Expect each invocation to include: @@ -195,10 +195,10 @@ cmd_dict['classify.tree'] = dict({'required' : ['taxonomy','tree'],'optional' : ['name','group','cutoff']}) #clear.memory ## not needed in galaxy framework cmd_dict['clearcut'] = dict({'required' : [['phylip','fasta']],'optional' : ['seed','norandom','shuffle','neighbor','expblen','expdist','ntrees','matrixout','kimura','jukes','protein','DNA']}) - cmd_dict['cluster'] = dict({'required' : [['phylip','column']] , 'optional' : ['name','method','cutoff','hard','precision','sim','showabund','timing']}) - cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] , 'optional' : ['name','method','cutoff','hard','sim','precision']}) + cmd_dict['cluster'] = dict({'required' : [['phylip','column']] , 'optional' : ['name','count','method','cutoff','hard','precision','sim','showabund','timing']}) + cmd_dict['cluster.classic'] = dict({'required' : ['phylip'] , 'optional' : ['name','count','method','cutoff','hard','sim','precision']}) cmd_dict['cluster.fragments'] = dict({'required' : ['fasta'] , 'optional' : ['name','diffs','percent']}) - cmd_dict['cluster.split'] = dict({'required' : [['fasta','phylip','column']] , 'optional' : ['name','method','splitmethod','taxonomy','taxlevel','showabund','cutoff','hard','large','precision','timing','processors']}) + cmd_dict['cluster.split'] = dict({'required' : [['fasta','phylip','column']] , 'optional' : ['name','count','method','splitmethod','taxonomy','taxlevel','showabund','cutoff','hard','large','precision','classic','timing','processors']}) cmd_dict['collect.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','freq','groups','all']}) cmd_dict['collect.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','size','label','freq']}) cmd_dict['consensus.seqs'] = dict({'required' : ['fasta'], 'optional' : ['list','name','label','cutoff']}) @@ -206,10 +206,10 @@ cmd_dict['cooccurrence'] = dict({'required' : ['shared'], 'optional' : ['iters','metric','matrixmodel','groups','label']}) cmd_dict['corr.axes'] = dict({'required' : [['shared','relabund','metadata'],'axes'], 'optional' : ['label','groups','method','numaxes']}) - cmd_dict['count.groups'] = dict({'required' : ['group','shared'], 'optional' : ['accnos','groups']}) - cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups']}) + cmd_dict['count.groups'] = dict({'required' : [['group','shared','count']], 'optional' : ['accnos','groups']}) + cmd_dict['count.seqs'] = dict({'required' : ['name'], 'optional' : ['group','groups','large']}) - cmd_dict['create.database'] = dict({'required' : ['list, repfasta','repname','contaxonomy'], 'optional' : ['group','label']}) + cmd_dict['create.database'] = dict({'required' : [['list','shared'],'repfasta','repname','contaxonomy'], 'optional' : ['group','label']}) cmd_dict['degap.seqs'] = dict({'required' : ['fasta']}) cmd_dict['deunique.seqs'] = dict({'required' : ['fasta','name'], 'optional' : []}) @@ -238,6 +238,7 @@ cmd_dict['list.seqs'] = dict({'required' : [['fasta','name','group','list','alignreport','taxonomy']]}) cmd_dict['make.biom'] = dict({'required' : ['shared'] , 'optional' : ['contaxonomy','matrixtype','groups','label']}) + cmd_dict['make.contigs'] = dict({'required' : ['ffastq','rfastq',], 'optional' : ['align','match','mismatch','gapopen','gapextend','threshold','oligos','bdiffs','pdiffs','tdiffs','processors']}) cmd_dict['make.fastq'] = dict({'required' : ['fasta','qfile'] , 'optional' : []}) cmd_dict['make.group'] = dict({'required' : ['fasta','groups'], 'optional' : []}) @@ -248,7 +249,7 @@ cmd_dict['metastats'] = dict({'required' : ['shared','design'], 'optional' : ['groups', 'label','iters','threshold','sets','processors']}) cmd_dict['nmds'] = dict({'required' : ['phylip'], 'optional' : ['axes','mindim','maxdim','iters','maxiters','epsilon']}) cmd_dict['normalize.shared'] = dict({'required' : [['shared','relabund']], 'optional' : ['label','method','norm','groups','makerelabund']}) - cmd_dict['otu.association'] = dict({'required' : [['shared','relabund']], 'optional' : ['groups', 'label','method']}) + cmd_dict['otu.association'] = dict({'required' : [['shared','relabund']], 'optional' : ['groups', 'label','method','metadata']}) cmd_dict['otu.hierarchy'] = dict({'required' : ['list','label'], 'optional' : ['output']}) cmd_dict['pairwise.seqs'] = dict({'required' : ['fasta'], 'optional' : ['align','calc','countends','output','cutoff','match','mismatch','gapopen','gapextend','processors']}) cmd_dict['parse.list'] = dict({'required' : ['list','group'], 'optional' : ['label']}) @@ -261,7 +262,7 @@ cmd_dict['phylo.diversity'] = dict({'required' : ['tree'],'optional' : ['group','name','groups','iters','freq','scale','rarefy','collect','summary','processors']}) cmd_dict['phylotype'] = dict({'required' : ['taxonomy'],'optional' : ['name','cutoff','label']}) cmd_dict['pre.cluster'] = dict({'required' : ['fasta'], 'optional' : ['name','diffs','group','processors']}) - cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble']}) + cmd_dict['rarefaction.shared'] = dict({'required' : ['shared'], 'optional' : ['calc','label','iters','groups','jumble','design','sets','groupmode']}) cmd_dict['rarefaction.single'] = dict({'required' : [['list', 'sabund', 'rabund', 'shared']], 'optional' : ['calc','abund','iters','label','freq','processors']}) cmd_dict['remove.groups'] = dict({'required' : ['group'], 'optional' : ['groups','accnos','fasta','name','list','shared','taxonomy','design']}) cmd_dict['remove.lineage'] = dict({'required' : ['taxonomy','taxon'],'optional' : ['fasta','name','group','list','alignreport','dups']}) @@ -273,7 +274,7 @@ cmd_dict['sens.spec'] = dict({'required' : ['list',['column','phylip']] , 'optional' : ['label','cutoff','hard','precision']}) cmd_dict['seq.error'] = dict({'required' : ['fasta','reference'] , 'optional' : ['name','qfile','report','ignorechimeras','threshold','processors']}) cmd_dict['sffinfo'] = dict({'required' : [['sff','sfftxt']], 'optional' : ['fasta','qfile','trim','sfftxt','flow','accnos']}) - cmd_dict['shhh.flows'] = dict({'required' : [['flow','files']], 'optional' : ['lookup','maxiter','mindelta','cutoff','sigma','order','processors']}) + cmd_dict['shhh.flows'] = dict({'required' : [['flow','files']], 'optional' : ['lookup','maxiter','mindelta','cutoff','sigma','order','large','processors']}) cmd_dict['shhh.seqs'] = dict({'required' : [['fasta','files']], 'optional' : ['group','sigma','processors']}) cmd_dict['split.abund'] = dict({'required' : ['fasta',['name','list']], 'optional' : ['cutoff','group','groups','label','accnos']}) cmd_dict['split.groups'] = dict({'required' : ['fasta','group'], 'optional' : ['name','groups']}) @@ -331,6 +332,8 @@ parser.add_option( '--numbases', dest='numbases', type="int", help='Number of base to allow' ) parser.add_option( '--fasta', dest='fasta', help='fasta file paths' ) parser.add_option( '--fastq', dest='fastq', help='fastq file paths' ) + parser.add_option( '--ffastq', dest='ffastq', help='forward fastq file' ) + parser.add_option( '--rfastq', dest='rfastq', help='reverse fastq file' ) parser.add_option( '--qfile', dest='qfile', help='Sequence read quality file (454 platform)' ) parser.add_option( '--repfasta', dest='repfasta', help='fasta file paths' ) parser.add_option( '--qaverage', dest='qaverage', type="int", help='Remove sequences that have an average quality below the value' ) @@ -401,6 +404,7 @@ parser.add_option( '--soft', dest='soft', type='int', help='Soft Mask - percentage required to retain column. (0-100)' ) parser.add_option( '--hard', dest='hard', help='Hard Column Filter - A file should only contain one line consisting of 0 and 1 chars' ) parser.add_option( '--calc', dest='calc', help='Calc Method - Gap Penality' ) + parser.add_option( '--count', dest='count', help='Count file' ) parser.add_option( '--countends', dest='countends', help='Penalize terminal gaps' ) parser.add_option( '--cutoff', dest='cutoff', help='Distance Cutoff threshold, discard larger distances' ) parser.add_option( '--countgaps', dest='countgaps', help='count gaps as bases' ) @@ -515,6 +519,7 @@ parser.add_option( '--matrixtype', dest='matrixtype', help='' ) parser.add_option( '--consensus', dest='consensus', help='boolean' ) parser.add_option( '--biom', dest='biom', help='biom file' ) + parser.add_option( '--classic', dest='classic', help='boolean' ) # include read.otu options parser.add_option( '--rabund', dest='rabund', help='' ) parser.add_option( '--sabund', dest='sabund', help='' ) @@ -653,10 +658,21 @@ if re.match(pattern,line): chimera_count += 1 info += "Chimeras: %d" % chimera_count + elif options.cmd == 'count.groups': + fh = open(os.path.join(outputdir,'tmp.groups.count'),'w') + for line in tmp_stdout: + m = re.match('(.+) contains (\d+)\.',line) + if m and len(m.groups()) == 2: + info += line + print >> fh, "%s\t%s\n" % (m.group(1),m.group(2)) + fh.close() else: found_begin = False info_chars = 0 for line in tmp_stdout: + if re.match('mothur > ' + options.cmd + '\(.*\)', line): + found_begin = True + continue if line.find(outputdir) >= 0: continue if line.startswith('**************'): @@ -671,18 +687,19 @@ continue if re.match('.*\.\.\.\s*$',line): continue - if re.match('^\d*\s*$',line): + if re.match('^\d*\s*$',line) and not line.find(' contains '): continue # if re.match('^(unique|[0-9.]*)(\t\d+)+',line): # abundance from cluster commands - if not options.cmd.startswith('unifrac') and re.match('^(unique|[0-9.]+)(\t\d+)*',line): # abundance from cluster commands, allow unique line into info + if (not (options.cmd.startswith('unifrac') or options.cmd.startswith('count.groups')) + and re.match('^(unique|[0-9.]+)(\t\d+)*',line)): # abundance from cluster commands, allow unique line into info continue if re.match('Output .*',line): break + if re.match('mothur > quit()',line): + break if found_begin and info_chars < 200: info += "%s" % line info_chars += len(line) - if re.match('mothur > ' + options.cmd + '\(.*\)', line): - found_begin = True tmp_stdout.close() print >> sys.stdout, info # Collect output files