Mercurial > repos > jjohnson > mothur_toolsuite
changeset 17:57df76d861e4
Modifications for ToolShed proprietary data types
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 17 Jan 2012 11:08:15 -0600 |
parents | 541e3c97c240 |
children | 697156806162 |
files | mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.py mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.xml mothur/lib/galaxy/datatypes/metagenomics.py mothur/tool-data/datatypes.conf.xml |
diffstat | 4 files changed, 217 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.py Tue Jan 17 11:08:15 2012 -0600 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +""" +convert a ref.taxonommy file to a seq.taxonomy file +Usage: +%python ref_to_seq_taxonomy_converter.py <ref.taxonommy_filename> <seq.taxonomy_filename> +""" + +import sys, os, re +from math import * + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( "%s" % msg ) + sys.exit() + +def __main__(): + infile_name = sys.argv[1] + outfile = open( sys.argv[2], 'w' ) + pat = '^([^ \t\n\r\x0c\x0b;]+([(]\\d+[)])?(;[^ \t\n\r\x0c\x0b;]+([(]\\d+[)]))*(;)?)$' + for i, line in enumerate( file( infile_name ) ): + line = line.rstrip() # eliminate trailing space and new line characters + if not line or line.startswith( '#' ): + continue + fields = line.split('\t') + # make sure the 2nd field (taxonomy) ends with a ; + outfile.write('%s\t%s;\n' % (fields[0], re.sub(';$','',fields[1]))) + + outfile.close() + +if __name__ == "__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.xml Tue Jan 17 11:08:15 2012 -0600 @@ -0,0 +1,12 @@ +<tool id="CONVERTER_ref_to_seq_taxomony" name="Convert Ref taxonomy to Seq Taxonomy" version="1.0.0"> + <description>converts 2 or 3 column sequence taxonomy file to a 2 column mothur taxonomy_outline format</description> + <command interpreter="python">ref_to_seq_taxonomy_converter.py $input $output</command> + <inputs> + <param name="input" type="data" format="ref.taxonomy" label="a Sequence Taxomony file"/> + </inputs> + <outputs> + <data name="output" format="seq.taxonomy"/> + </outputs> + <help> + </help> +</tool>
--- a/mothur/lib/galaxy/datatypes/metagenomics.py Fri Dec 09 12:12:16 2011 -0600 +++ b/mothur/lib/galaxy/datatypes/metagenomics.py Tue Jan 17 11:08:15 2012 -0600 @@ -4,18 +4,19 @@ for Mothur """ -import data -import logging, os, sys, time, tempfile, shutil, string, glob, re +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re import galaxy.model +from galaxy.datatypes import data +from galaxy.datatypes.sniff import * from galaxy.datatypes import metadata from galaxy.datatypes import tabular from galaxy.datatypes import sequence from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.data import Text from galaxy.datatypes.tabular import Tabular from galaxy.datatypes.sequence import Fasta from galaxy import util from galaxy.datatypes.images import Html -from sniff import * log = logging.getLogger(__name__) @@ -342,7 +343,7 @@ Tabular.__init__( self, **kwd ) self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] -class DistanceMatrix(data.Text): +class DistanceMatrix( Text ): file_ext = 'dist' """Add metadata elements""" MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) @@ -403,7 +404,7 @@ """Initialize secondary structure map datatype""" Tabular.__init__( self, **kwd ) def init_meta( self, dataset, copy_from=None ): - data.Text.init_meta( self, dataset, copy_from=copy_from ) + Text.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): dataset.metadata.sequences = 0 @@ -543,7 +544,7 @@ self.column_names = ['name'] self.columns = 1 -class Oligos( data.Text ): +class Oligos( Text ): file_ext = 'oligos' def sniff( self, filename ): @@ -699,7 +700,7 @@ self.masked = True self.filtered = True -class LaneMask(data.Text): +class LaneMask(Text): file_ext = 'filter' def sniff( self, filename ): @@ -872,7 +873,7 @@ Tabular.__init__( self, **kwd ) self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] -class Phylip(data.Text): +class Phylip(Text): file_ext = 'phy' def sniff( self, filename ): @@ -1033,6 +1034,124 @@ out = "Can't create peek %s" % str( exc ) return out +class Newick( Text ): + """ + The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. + http://evolution.genetics.washington.edu/phylip/newicktree.html + http://en.wikipedia.org/wiki/Newick_format + Example: + (B,(A,C,E),D); + or example with branch lengths: + (B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0); + or an example with embedded comments but no branch lengths: + ((a [&&PRIME S=x], b [&&PRIME S=y]), c [&&PRIME S=z]); + Example with named interior noe: + (B:6.0,(A:5.0,C:3.0,E:4.0)Ancestor1:5.0,D:11.0); + """ + file_ext = 'tre' + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + + def sniff( self, filename ): ## TODO + """ + Determine whether the file is in Newick format + Note: Last non-space char of a tree should be a semicolon: ';' + Usually the first char will be a open parenthesis: '(' + (,,(,)); no nodes are named + (A,B,(C,D)); leaf nodes are named + (A,B,(C,D)E)F; all nodes are named + (:0.1,:0.2,(:0.3,:0.4):0.5); all but root node have a distance to parent + (:0.1,:0.2,(:0.3,:0.4):0.5):0.0; all have a distance to parent + (A:0.1,B:0.2,(C:0.3,D:0.4):0.5); distances and leaf names (popular) + (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F; distances and all names + ((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A; a tree rooted on a leaf node (rare) + """ + if not os.path.exists(filename): + return False + try: + ## For now, guess this is a Newick file if it starts with a '(' and ends with a ';' + flen = os.path.getsize(filename) + fh = open( filename ) + len = min(flen,2000) + # check end of the file for a semicolon + fh.seek(-len,os.SEEK_END) + buf = fh.read(len).strip() + buf = buf.strip() + if not buf.endswith(';'): + return False + # See if this starts with a open parenthesis + if len < flen: + fh.seek(0) + buf = fh.read(len).strip() + if buf.startswith('('): + return True + except: + pass + finally: + close(fh) + return False + +class Nhx( Newick ): + """ + New Hampshire eXtended Newick with embedded + The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. + http://evolution.genetics.washington.edu/phylip/newicktree.html + http://en.wikipedia.org/wiki/Newick_format + Example: + (gene1_Hu[&&NHX:S=Hu_Homo_sapiens], (gene2_Hu[&&NHX:S=Hu_Homo_sapiens], gene2_Mu[&&NHX:S=Mu_Mus_musculus])); + """ + file_ext = 'nhx' + +class Nexus( Text ): + """ + http://en.wikipedia.org/wiki/Nexus_file + Example: + #NEXUS + BEGIN TAXA; + Dimensions NTax=4; + TaxLabels fish frog snake mouse; + END; + + BEGIN CHARACTERS; + Dimensions NChar=20; + Format DataType=DNA; + Matrix + fish ACATA GAGGG TACCT CTAAG + frog ACATA GAGGG TACCT CTAAG + snake ACATA GAGGG TACCT CTAAG + mouse ACATA GAGGG TACCT CTAAG + END; + + BEGIN TREES; + Tree best=(fish, (frog, (snake, mouse))); + END; + """ + file_ext = 'nex' + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is in nexus format + First line should be: + #NEXUS + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + if line and line == '#NEXUS': + fh.close() + return True + except: + pass + finally: + fh.close() + return False + ## Qiime Classes @@ -1165,7 +1284,7 @@ """ file_ext = 'qiimeparams' -class QiimePrefs(data.Text): +class QiimePrefs(Text): """ A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. Example:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/datatypes.conf.xml Tue Jan 17 11:08:15 2012 -0600 @@ -0,0 +1,46 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="metagenomics.py"/> + </datatype_files> + <registration> + <datatype extension="otu" type="galaxy.datatypes.metagenomics:Otu" display_in_upload="true"/> + <datatype extension="list" type="galaxy.datatypes.metagenomics:OtuList" display_in_upload="true"/> + <datatype extension="sabund" type="galaxy.datatypes.metagenomics:Sabund" display_in_upload="true"/> + <datatype extension="rabund" type="galaxy.datatypes.metagenomics:Rabund" display_in_upload="true"/> + <datatype extension="shared" type="galaxy.datatypes.metagenomics:SharedRabund" display_in_upload="true"/> + <datatype extension="relabund" type="galaxy.datatypes.metagenomics:RelAbund" display_in_upload="true"/> + <datatype extension="names" type="galaxy.datatypes.metagenomics:Names" display_in_upload="true"/> + <datatype extension="design" type="galaxy.datatypes.metagenomics:Design" display_in_upload="true"/> + <datatype extension="summary" type="galaxy.datatypes.metagenomics:Summary" display_in_upload="true"/> + <datatype extension="groups" type="galaxy.datatypes.metagenomics:Group" display_in_upload="true"/> + <datatype extension="oligos" type="galaxy.datatypes.metagenomics:Oligos" display_in_upload="true"/> + <datatype extension="align" type="galaxy.datatypes.metagenomics:SequenceAlignment" display_in_upload="true"/> + <datatype extension="accnos" type="galaxy.datatypes.metagenomics:AccNos" display_in_upload="true"/> + <datatype extension="map" type="galaxy.datatypes.metagenomics:SecondaryStructureMap" display_in_upload="true"/> + <datatype extension="align.check" type="galaxy.datatypes.metagenomics:AlignCheck" display_in_upload="true"/> + <datatype extension="align.report" type="galaxy.datatypes.metagenomics:AlignReport" display_in_upload="true"/> + <datatype extension="filter" type="galaxy.datatypes.metagenomics:LaneMask" display_in_upload="true"/> + <datatype extension="dist" type="galaxy.datatypes.metagenomics:DistanceMatrix" display_in_upload="true"/> + <datatype extension="pair.dist" type="galaxy.datatypes.metagenomics:PairwiseDistanceMatrix" display_in_upload="true"/> + <datatype extension="square.dist" type="galaxy.datatypes.metagenomics:SquareDistanceMatrix" display_in_upload="true"/> + <datatype extension="lower.dist" type="galaxy.datatypes.metagenomics:LowerTriangleDistanceMatrix" display_in_upload="true"/> + <datatype extension="ref.taxonomy" type="galaxy.datatypes.metagenomics:RefTaxonomy" display_in_upload="true"> + <converter file="ref_to_seq_taxonomy_converter.xml" target_datatype="seq.taxonomy"/> + </datatype> + <datatype extension="seq.taxonomy" type="galaxy.datatypes.metagenomics:SequenceTaxonomy" display_in_upload="true"/> + <datatype extension="rdp.taxonomy" type="galaxy.datatypes.metagenomics:RDPSequenceTaxonomy" display_in_upload="true"/> + <datatype extension="cons.taxonomy" type="galaxy.datatypes.metagenomics:ConsensusTaxonomy" display_in_upload="true"/> + <datatype extension="tax.summary" type="galaxy.datatypes.metagenomics:TaxonomySummary" display_in_upload="true"/> + <datatype extension="freq" type="galaxy.datatypes.metagenomics:Frequency" display_in_upload="true"/> + <datatype extension="quan" type="galaxy.datatypes.metagenomics:Quantile" display_in_upload="true"/> + <datatype extension="filtered.quan" type="galaxy.datatypes.metagenomics:FilteredQuantile" display_in_upload="true"/> + <datatype extension="masked.quan" type="galaxy.datatypes.metagenomics:MaskedQuantile" display_in_upload="true"/> + <datatype extension="filtered.masked.quan" type="galaxy.datatypes.metagenomics:FilteredMaskedQuantile" display_in_upload="true"/> + <datatype extension="axes" type="galaxy.datatypes.metagenomics:Axes" display_in_upload="true"/> + <datatype extension="sff.flow" type="galaxy.datatypes.metagenomics:SffFlow" display_in_upload="true"/> + <datatype extension="tre" type="galaxy.datatypes.metagenomics:Newick" display_in_upload="true"/> + <datatype extension="nhx" type="galaxy.datatypes.metagenomics:Nhx" display_in_upload="true"/> + <datatype extension="nex" type="galaxy.datatypes.metagenomics:Nexus" display_in_upload="true"/> + </registration> +</datatypes>