# HG changeset patch # User Jim Johnson # Date 1326820095 21600 # Node ID 57df76d861e4d176ac99df719d0018afd8f67905 # Parent 541e3c97c2400b77095b3e6f3832350966724793 Modifications for ToolShed proprietary data types diff -r 541e3c97c240 -r 57df76d861e4 mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.py Tue Jan 17 11:08:15 2012 -0600 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +""" +convert a ref.taxonommy file to a seq.taxonomy file +Usage: +%python ref_to_seq_taxonomy_converter.py +""" + +import sys, os, re +from math import * + +assert sys.version_info[:2] >= ( 2, 4 ) + +def stop_err( msg ): + sys.stderr.write( "%s" % msg ) + sys.exit() + +def __main__(): + infile_name = sys.argv[1] + outfile = open( sys.argv[2], 'w' ) + pat = '^([^ \t\n\r\x0c\x0b;]+([(]\\d+[)])?(;[^ \t\n\r\x0c\x0b;]+([(]\\d+[)]))*(;)?)$' + for i, line in enumerate( file( infile_name ) ): + line = line.rstrip() # eliminate trailing space and new line characters + if not line or line.startswith( '#' ): + continue + fields = line.split('\t') + # make sure the 2nd field (taxonomy) ends with a ; + outfile.write('%s\t%s;\n' % (fields[0], re.sub(';$','',fields[1]))) + + outfile.close() + +if __name__ == "__main__": __main__() diff -r 541e3c97c240 -r 57df76d861e4 mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/lib/galaxy/datatypes/converters/ref_to_seq_taxonomy_converter.xml Tue Jan 17 11:08:15 2012 -0600 @@ -0,0 +1,12 @@ + + converts 2 or 3 column sequence taxonomy file to a 2 column mothur taxonomy_outline format + ref_to_seq_taxonomy_converter.py $input $output + + + + + + + + + diff -r 541e3c97c240 -r 57df76d861e4 mothur/lib/galaxy/datatypes/metagenomics.py --- a/mothur/lib/galaxy/datatypes/metagenomics.py Fri Dec 09 12:12:16 2011 -0600 +++ b/mothur/lib/galaxy/datatypes/metagenomics.py Tue Jan 17 11:08:15 2012 -0600 @@ -4,18 +4,19 @@ for Mothur """ -import data -import logging, os, sys, time, tempfile, shutil, string, glob, re +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re import galaxy.model +from galaxy.datatypes import data +from galaxy.datatypes.sniff import * from galaxy.datatypes import metadata from galaxy.datatypes import tabular from galaxy.datatypes import sequence from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.data import Text from galaxy.datatypes.tabular import Tabular from galaxy.datatypes.sequence import Fasta from galaxy import util from galaxy.datatypes.images import Html -from sniff import * log = logging.getLogger(__name__) @@ -342,7 +343,7 @@ Tabular.__init__( self, **kwd ) self.column_names = ['name','pound','dash','plus','equal','loop','tilde','total'] -class DistanceMatrix(data.Text): +class DistanceMatrix( Text ): file_ext = 'dist' """Add metadata elements""" MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) @@ -403,7 +404,7 @@ """Initialize secondary structure map datatype""" Tabular.__init__( self, **kwd ) def init_meta( self, dataset, copy_from=None ): - data.Text.init_meta( self, dataset, copy_from=copy_from ) + Text.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): dataset.metadata.sequences = 0 @@ -543,7 +544,7 @@ self.column_names = ['name'] self.columns = 1 -class Oligos( data.Text ): +class Oligos( Text ): file_ext = 'oligos' def sniff( self, filename ): @@ -699,7 +700,7 @@ self.masked = True self.filtered = True -class LaneMask(data.Text): +class LaneMask(Text): file_ext = 'filter' def sniff( self, filename ): @@ -872,7 +873,7 @@ Tabular.__init__( self, **kwd ) self.column_names = ['taxlevel','rankID','taxon','daughterlevels','total'] -class Phylip(data.Text): +class Phylip(Text): file_ext = 'phy' def sniff( self, filename ): @@ -1033,6 +1034,124 @@ out = "Can't create peek %s" % str( exc ) return out +class Newick( Text ): + """ + The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. + http://evolution.genetics.washington.edu/phylip/newicktree.html + http://en.wikipedia.org/wiki/Newick_format + Example: + (B,(A,C,E),D); + or example with branch lengths: + (B:6.0,(A:5.0,C:3.0,E:4.0):5.0,D:11.0); + or an example with embedded comments but no branch lengths: + ((a [&&PRIME S=x], b [&&PRIME S=y]), c [&&PRIME S=z]); + Example with named interior noe: + (B:6.0,(A:5.0,C:3.0,E:4.0)Ancestor1:5.0,D:11.0); + """ + file_ext = 'tre' + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + + def sniff( self, filename ): ## TODO + """ + Determine whether the file is in Newick format + Note: Last non-space char of a tree should be a semicolon: ';' + Usually the first char will be a open parenthesis: '(' + (,,(,)); no nodes are named + (A,B,(C,D)); leaf nodes are named + (A,B,(C,D)E)F; all nodes are named + (:0.1,:0.2,(:0.3,:0.4):0.5); all but root node have a distance to parent + (:0.1,:0.2,(:0.3,:0.4):0.5):0.0; all have a distance to parent + (A:0.1,B:0.2,(C:0.3,D:0.4):0.5); distances and leaf names (popular) + (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F; distances and all names + ((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A; a tree rooted on a leaf node (rare) + """ + if not os.path.exists(filename): + return False + try: + ## For now, guess this is a Newick file if it starts with a '(' and ends with a ';' + flen = os.path.getsize(filename) + fh = open( filename ) + len = min(flen,2000) + # check end of the file for a semicolon + fh.seek(-len,os.SEEK_END) + buf = fh.read(len).strip() + buf = buf.strip() + if not buf.endswith(';'): + return False + # See if this starts with a open parenthesis + if len < flen: + fh.seek(0) + buf = fh.read(len).strip() + if buf.startswith('('): + return True + except: + pass + finally: + close(fh) + return False + +class Nhx( Newick ): + """ + New Hampshire eXtended Newick with embedded + The Newick Standard for representing trees in computer-readable form makes use of the correspondence between trees and nested parentheses. + http://evolution.genetics.washington.edu/phylip/newicktree.html + http://en.wikipedia.org/wiki/Newick_format + Example: + (gene1_Hu[&&NHX:S=Hu_Homo_sapiens], (gene2_Hu[&&NHX:S=Hu_Homo_sapiens], gene2_Mu[&&NHX:S=Mu_Mus_musculus])); + """ + file_ext = 'nhx' + +class Nexus( Text ): + """ + http://en.wikipedia.org/wiki/Nexus_file + Example: + #NEXUS + BEGIN TAXA; + Dimensions NTax=4; + TaxLabels fish frog snake mouse; + END; + + BEGIN CHARACTERS; + Dimensions NChar=20; + Format DataType=DNA; + Matrix + fish ACATA GAGGG TACCT CTAAG + frog ACATA GAGGG TACCT CTAAG + snake ACATA GAGGG TACCT CTAAG + mouse ACATA GAGGG TACCT CTAAG + END; + + BEGIN TREES; + Tree best=(fish, (frog, (snake, mouse))); + END; + """ + file_ext = 'nex' + + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + + def sniff( self, filename ): + """ + Determines whether the file is in nexus format + First line should be: + #NEXUS + """ + try: + fh = open( filename ) + count = 0 + line = fh.readline() + line = line.strip() + if line and line == '#NEXUS': + fh.close() + return True + except: + pass + finally: + fh.close() + return False + ## Qiime Classes @@ -1165,7 +1284,7 @@ """ file_ext = 'qiimeparams' -class QiimePrefs(data.Text): +class QiimePrefs(Text): """ A text file, containing coloring preferences to be used by make_distance_histograms.py, make_2d_plots.py and make_3d_plots.py. Example: diff -r 541e3c97c240 -r 57df76d861e4 mothur/tool-data/datatypes.conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mothur/tool-data/datatypes.conf.xml Tue Jan 17 11:08:15 2012 -0600 @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +