# HG changeset patch # User Jim Johnson # Date 1337189324 18000 # Node ID bfbaf823be4c125bc86d51cff07d855e99601905 # Parent 09740be2bc9c6e082c68e03c397f23a31995b307 Change metagenomics datatypes to include labels and groups metadata. change Mothur tool configs to get label and group select options from a data_meta filter rather than using the options from_dataset attribute. This grealty decreases memory demand for the galaxy server. diff -r 09740be2bc9c -r bfbaf823be4c mothur/lib/galaxy/datatypes/metagenomics.py --- a/mothur/lib/galaxy/datatypes/metagenomics.py Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/lib/galaxy/datatypes/metagenomics.py Wed May 16 12:28:44 2012 -0500 @@ -18,13 +18,42 @@ from galaxy import util from galaxy.datatypes.images import Html + log = logging.getLogger(__name__) ## Mothur Classes -class Otu( Tabular ): +class Otu( Text ): file_ext = 'otu' + MetadataElement( name="columns", default=0, desc="Number of columns", readonly=True, visible=True, no_value=0 ) + MetadataElement( name="labels", default=[], desc="Label Names", readonly=True, visible=True, no_value=[] ) + def __init__(self, **kwd): + Text.__init__( self, **kwd ) + def set_meta( self, dataset, overwrite = True, **kwd ): + if dataset.has_data(): + label_names = set() + ncols = 0 + data_lines = 0 + comment_lines = 0 + try: + fh = open( dataset.file_name ) + for line in fh: + fields = line.strip().split('\t') + if len(fields) >= 2: + data_lines += 1 + ncols = max(ncols,len(fields)) + label_names.add(fields[0]) + else: + comment_lines += 1 + # Set the discovered metadata values for the dataset + dataset.metadata.data_lines = data_lines + dataset.metadata.columns = ncols + dataset.metadata.labels = [] + dataset.metadata.labels += label_names + dataset.metadata.labels.sort() + finally: + fh.close() def sniff( self, filename ): """ @@ -63,10 +92,25 @@ class OtuList( Otu ): file_ext = 'list' + def __init__(self, **kwd): + Otu.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + Otu.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, **kwd ): + Otu.set_meta(self,dataset, overwrite = True, **kwd ) + """ + # too many columns to be stored in metadata + if dataset != None and dataset.metadata.columns > 2: + for i in range(2,dataset.metadata.columns): + dataset.metadata.column_types[i] = 'str' + """ class Sabund( Otu ): file_ext = 'sabund' - + def __init__(self, **kwd): + Otu.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + Otu.init_meta( self, dataset, copy_from=copy_from ) def sniff( self, filename ): """ Determines whether the file is a otu (operational taxonomic unit) format @@ -108,33 +152,71 @@ class Rabund( Sabund ): file_ext = 'rabund' + def __init__(self, **kwd): + Sabund.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + Sabund.init_meta( self, dataset, copy_from=copy_from ) class GroupAbund( Otu ): file_ext = 'grpabund' + MetadataElement( name="groups", default=[], desc="Group Names", readonly=True, visible=True, no_value=[] ) + def __init__(self, **kwd): + Otu.__init__( self, **kwd ) + # self.column_names[0] = ['label'] + # self.column_names[1] = ['group'] + # self.column_names[2] = ['count'] + """ + def init_meta( self, dataset, copy_from=None ): + Otu.init_meta( self, dataset, copy_from=copy_from ) + """ def init_meta( self, dataset, copy_from=None ): Otu.init_meta( self, dataset, copy_from=copy_from ) def set_meta( self, dataset, overwrite = True, skip=1, max_data_lines = 100000, **kwd ): # See if file starts with header line if dataset.has_data(): + label_names = set() + group_names = set() + data_lines = 0 + comment_lines = 0 + ncols = 0 try: fh = open( dataset.file_name ) line = fh.readline() - line = line.strip() - linePieces = line.split('\t') - if linePieces[0] == 'label' and linePieces[1] == 'Group': + fields = line.strip().split('\t') + ncols = max(ncols,len(fields)) + if fields[0] == 'label' and fields[1] == 'Group': skip=1 + comment_lines += 1 else: skip=0 + data_lines += 1 + label_names.add(fields[0]) + group_names.add(fields[1]) + for line in fh: + data_lines += 1 + fields = line.strip().split('\t') + ncols = max(ncols,len(fields)) + label_names.add(fields[0]) + group_names.add(fields[1]) + # Set the discovered metadata values for the dataset + dataset.metadata.data_lines = data_lines + dataset.metadata.columns = ncols + dataset.metadata.labels = [] + dataset.metadata.labels += label_names + dataset.metadata.labels.sort() + dataset.metadata.groups = [] + dataset.metadata.groups += group_names + dataset.metadata.groups.sort() + dataset.metadata.skip = skip finally: fh.close() - Otu.set_meta( self, dataset, overwrite, skip, max_data_lines, **kwd) + def sniff( self, filename, vals_are_int=False): """ Determines whether the file is a otu (operational taxonomic unit) Shared format labelgroupcount[value(1..n)] The first line is column headings as of Mothur v 1.20 """ - log.info( "sniff GroupAbund vals_are_int %s" % vals_are_int) try: fh = open( filename ) count = 0 @@ -174,8 +256,10 @@ class SharedRabund( GroupAbund ): file_ext = 'shared' - - + def __init__(self, **kwd): + GroupAbund.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + GroupAbund.init_meta( self, dataset, copy_from=copy_from ) def sniff( self, filename ): """ Determines whether the file is a otu (operational taxonomic unit) Shared format @@ -184,13 +268,15 @@ """ # return GroupAbund.sniff(self,filename,True) isme = GroupAbund.sniff(self,filename,True) - log.info( "is SharedRabund %s" % isme) return isme class RelAbund( GroupAbund ): file_ext = 'relabund' - + def __init__(self, **kwd): + GroupAbund.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + GroupAbund.init_meta( self, dataset, copy_from=copy_from ) def sniff( self, filename ): """ Determines whether the file is a otu (operational taxonomic unit) Relative Abundance format @@ -199,7 +285,6 @@ """ # return GroupAbund.sniff(self,filename,False) isme = GroupAbund.sniff(self,filename,False) - log.info( "is RelAbund %s" % isme) return isme class SecondaryStructureMap(Tabular): @@ -346,8 +431,21 @@ class DistanceMatrix( Text ): file_ext = 'dist' """Add metadata elements""" - MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=False, optional=True, no_value=0 ) + MetadataElement( name="sequence_count", default=0, desc="Number of sequences", readonly=True, visible=True, optional=True, no_value='?' ) + + def init_meta( self, dataset, copy_from=None ): + Text.init_meta( self, dataset, copy_from=copy_from ) + def set_meta( self, dataset, overwrite = True, skip = 0, **kwd ): + Text.set_meta(self, dataset,overwrite = overwrite, skip = skip, **kwd ) + try: + fh = open( dataset.file_name ) + line = fh.readline().strip().strip() + dataset.metadata.sequence_count = int(line) + except Exception, e: + log.warn("DistanceMatrix set_meta %s" % e) + finally: + fh.close() class LowerTriangleDistanceMatrix(DistanceMatrix): file_ext = 'lower.dist' @@ -355,6 +453,9 @@ """Initialize secondary structure map datatype""" DistanceMatrix.__init__( self, **kwd ) + def init_meta( self, dataset, copy_from=None ): + DistanceMatrix.init_meta( self, dataset, copy_from=copy_from ) + def sniff( self, filename ): """ Determines whether the file is a lower-triangle distance matrix (phylip) format @@ -396,17 +497,13 @@ fh.close() return False -class SquareDistanceMatrix(DistanceMatrix,Tabular): +class SquareDistanceMatrix(DistanceMatrix): file_ext = 'square.dist' - sequence_count = -1 def __init__(self, **kwd): - """Initialize secondary structure map datatype""" - Tabular.__init__( self, **kwd ) + DistanceMatrix.__init__( self, **kwd ) def init_meta( self, dataset, copy_from=None ): - Text.init_meta( self, dataset, copy_from=copy_from ) - def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): - dataset.metadata.sequences = 0 + DistanceMatrix.init_meta( self, dataset, copy_from=copy_from ) def sniff( self, filename ): """ @@ -460,7 +557,8 @@ Tabular.__init__( self, **kwd ) self.column_names = ['Sequence','Sequence','Distance'] self.column_types = ['str','str','float'] - self.comment_lines = 1 + def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): + Tabular.set_meta(self, dataset,overwrite = overwrite, skip = skip, **kwd ) def sniff( self, filename ): """ @@ -522,19 +620,30 @@ class Group(Tabular): file_ext = 'groups' + MetadataElement( name="groups", default=[], desc="Group Names", readonly=True, visible=True, no_value=[] ) def __init__(self, **kwd): - """Name file shows the relationship between a representative sequence(col 1) and the sequences it represents(col 2)""" + """Group file assigns sequence (col 1) to a group (col 2)""" Tabular.__init__( self, **kwd ) self.column_names = ['name','group'] self.columns = 2 + def set_meta( self, dataset, overwrite = True, skip = None, max_data_lines = None, **kwd ): + Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines) + group_names = set() + try: + fh = open( dataset.file_name ) + for line in fh: + fields = line.strip().split('\t') + group_names.add(fields[1]) + dataset.metadata.groups = [] + dataset.metadata.groups += group_names + finally: + fh.close() -class Design(Tabular): +class Design(Group): file_ext = 'design' def __init__(self, **kwd): - """Name file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" - Tabular.__init__( self, **kwd ) - self.column_names = ['group','grouping'] - self.columns = 2 + """Design file shows the relationship between a group(col 1) and a grouping (col 2), providing a way to merge groups.""" + Group.__init__( self, **kwd ) class AccNos(Tabular): file_ext = 'accnos' @@ -634,8 +743,6 @@ Tabular.__init__( self, **kwd ) self.column_names = ['num','ten','twentyfive','fifty','seventyfive','ninetyfive','ninetynine'] self.column_types = ['int','float','float','float','float','float','float'] - def set_meta( self, dataset, overwrite = True, skip = None, **kwd ): - log.info( "Mothur Quantile set_meta %s" % kwd) def sniff( self, filename ): """ Determines whether the file is a quantiles tabular format for chimera analysis diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/bin.seqs.xml --- a/mothur/tools/mothur/bin.seqs.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/bin.seqs.xml Wed May 16 12:28:44 2012 -0500 @@ -24,9 +24,8 @@ - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/classify.otu.xml --- a/mothur/tools/mothur/classify.otu.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/classify.otu.xml Wed May 16 12:28:44 2012 -0500 @@ -67,9 +67,8 @@ - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/collect.shared.xml --- a/mothur/tools/mothur/collect.shared.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/collect.shared.xml Wed May 16 12:28:44 2012 -0500 @@ -26,20 +26,14 @@ To filter: select labels to include - - - - - + + To filter: select select at least 2 groups - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/collect.single.xml --- a/mothur/tools/mothur/collect.single.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/collect.single.xml Wed May 16 12:28:44 2012 -0500 @@ -37,10 +37,8 @@ - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/consensus.seqs.xml --- a/mothur/tools/mothur/consensus.seqs.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/consensus.seqs.xml Wed May 16 12:28:44 2012 -0500 @@ -35,9 +35,8 @@ - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/corr.axes.xml --- a/mothur/tools/mothur/corr.axes.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/corr.axes.xml Wed May 16 12:28:44 2012 -0500 @@ -38,19 +38,13 @@ - - - - - + + - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/count.groups.xml --- a/mothur/tools/mothur/count.groups.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/count.groups.xml Wed May 16 12:28:44 2012 -0500 @@ -32,10 +32,8 @@ All groups displayed if none are selected. - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/count.seqs.xml --- a/mothur/tools/mothur/count.seqs.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/count.seqs.xml Wed May 16 12:28:44 2012 -0500 @@ -23,10 +23,8 @@ All groups displayed if none are selected. - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/dist.shared.xml --- a/mothur/tools/mothur/dist.shared.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/dist.shared.xml Wed May 16 12:28:44 2012 -0500 @@ -36,19 +36,13 @@ - - - - - + + - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.groups.xml --- a/mothur/tools/mothur/get.groups.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.groups.xml Wed May 16 12:28:44 2012 -0500 @@ -53,10 +53,8 @@ - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.otulist.xml --- a/mothur/tools/mothur/get.otulist.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.otulist.xml Wed May 16 12:28:44 2012 -0500 @@ -22,9 +22,8 @@ - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.oturep.xml --- a/mothur/tools/mothur/get.oturep.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.oturep.xml Wed May 16 12:28:44 2012 -0500 @@ -65,18 +65,15 @@ - - - - + + - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.otus.xml --- a/mothur/tools/mothur/get.otus.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.otus.xml Wed May 16 12:28:44 2012 -0500 @@ -27,9 +27,8 @@ - - - + + @@ -40,10 +39,8 @@ At least one group must be selected - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.rabund.xml --- a/mothur/tools/mothur/get.rabund.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.rabund.xml Wed May 16 12:28:44 2012 -0500 @@ -18,9 +18,8 @@ - - - + + - - - - - + + - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.sabund.xml --- a/mothur/tools/mothur/get.sabund.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.sabund.xml Wed May 16 12:28:44 2012 -0500 @@ -17,9 +17,8 @@ - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/get.sharedseqs.xml --- a/mothur/tools/mothur/get.sharedseqs.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/get.sharedseqs.xml Wed May 16 12:28:44 2012 -0500 @@ -39,9 +39,8 @@ - - - + + @@ -55,19 +54,15 @@ - - - - + + - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/heatmap.bin.xml --- a/mothur/tools/mothur/heatmap.bin.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/heatmap.bin.xml Wed May 16 12:28:44 2012 -0500 @@ -49,29 +49,21 @@ - - - - - + + - - - - - + + - - - - + + @@ -79,17 +71,13 @@ - - - - + + - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/heatmap.sim.xml --- a/mothur/tools/mothur/heatmap.sim.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/heatmap.sim.xml Wed May 16 12:28:44 2012 -0500 @@ -46,19 +46,13 @@ - - - - - + + - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/indicator.xml --- a/mothur/tools/mothur/indicator.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/indicator.xml Wed May 16 12:28:44 2012 -0500 @@ -29,19 +29,13 @@ - - - - - + + - - - - - + + - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/make.design.xml --- a/mothur/tools/mothur/make.design.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/make.design.xml Wed May 16 12:28:44 2012 -0500 @@ -7,10 +7,8 @@ - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/make.shared.xml --- a/mothur/tools/mothur/make.shared.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/make.shared.xml Wed May 16 12:28:44 2012 -0500 @@ -29,16 +29,13 @@ - - - + + - - - - + + - - - + + - - - - - + + - - - - - + + - - - + + - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/summary.shared.xml --- a/mothur/tools/mothur/summary.shared.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/summary.shared.xml Wed May 16 12:28:44 2012 -0500 @@ -24,19 +24,13 @@ - - - - - + + - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/summary.single.xml --- a/mothur/tools/mothur/summary.single.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/summary.single.xml Wed May 16 12:28:44 2012 -0500 @@ -37,10 +37,8 @@ - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/tree.shared.xml --- a/mothur/tools/mothur/tree.shared.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/tree.shared.xml Wed May 16 12:28:44 2012 -0500 @@ -53,19 +53,13 @@ - - - - - + + - - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/unifrac.unweighted.xml --- a/mothur/tools/mothur/unifrac.unweighted.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/unifrac.unweighted.xml Wed May 16 12:28:44 2012 -0500 @@ -31,10 +31,8 @@ - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/unifrac.weighted.xml --- a/mothur/tools/mothur/unifrac.weighted.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/unifrac.weighted.xml Wed May 16 12:28:44 2012 -0500 @@ -30,10 +30,8 @@ - - - - + + diff -r 09740be2bc9c -r bfbaf823be4c mothur/tools/mothur/venn.xml --- a/mothur/tools/mothur/venn.xml Wed Mar 28 15:45:03 2012 -0500 +++ b/mothur/tools/mothur/venn.xml Wed May 16 12:28:44 2012 -0500 @@ -52,19 +52,13 @@ - - - - - + + - - - - - + + @@ -79,9 +73,8 @@ - - - + +