Mercurial > repos > iuc > scikit_bio_diversity_beta_diversity
view scikit_bio_diversity_beta_diversity.py @ 1:024a9b86f853 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scikit_bio commit bcfc8451fa622a6bf3aa31fa1e308c394feefb38
author | iuc |
---|---|
date | Mon, 14 Nov 2016 13:15:03 -0500 |
parents | 63706c95c9ed |
children |
line wrap: on
line source
#!/usr/bin/env python # Reports a beta diversity matrix for tabular input file # using scikit-bio # Daniel Blankenberg import sys import optparse import codecs from skbio.diversity import beta_diversity from skbio import TreeNode __VERSION__ = "0.0.1" DELIMITER = '\t' NEEDS_TREE = [ 'unweighted_unifrac', 'weighted_unifrac' ] NEEDS_OTU_NAMES = [ 'unweighted_unifrac', 'weighted_unifrac' ] def __main__(): parser = optparse.OptionParser( usage="%prog [options]" ) parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='print version and exit' ) parser.add_option( '-i', '--input', dest='input', action='store', type="string", default=None, help='Input abundance Filename' ) parser.add_option( '', '--otu_column', dest='otu_column', action='store', type="int", default=None, help='OTU ID Column (1 based)' ) parser.add_option( '', '--sample_columns', dest='sample_columns', action='store', type="string", default=None, help='Comma separated list of sample columns, unset to use all.' ) parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Abundance file has a header line' ) parser.add_option( '', '--distance_metric', dest='distance_metric', action='store', type="string", default=None, help='Distance metric to use' ) parser.add_option( '', '--tree', dest='tree', action='store', type="string", default=None, help='Newick Tree Filename' ) parser.add_option( '-o', '--output', dest='output', action='store', type="string", default=None, help='Output Filename' ) (options, args) = parser.parse_args() if options.version: print >> sys.stderr, "scikit-bio betadiversity from tabular file", __VERSION__ sys.exit() if options.otu_column is not None: otu_column = options.otu_column - 1 else: otu_column = None if options.sample_columns is None: with open( options.input, 'rb' ) as fh: line = fh.readline() columns = range( len( line.split( DELIMITER ) ) ) if otu_column in columns: columns.remove( otu_column ) else: columns = map( lambda x: int( x ) - 1, options.sample_columns.split( "," ) ) max_col = max( columns + [otu_column] ) counts = [ [] for x in columns ] sample_names = [] otu_names = [] with open( options.input, 'rb' ) as fh: if options.header: header = fh.readline().rstrip('\n\r').split( DELIMITER ) sample_names = [ header[i] for i in columns ] else: sample_names = [ "SAMPLE_%i" % x for x in range( len( columns ) ) ] for i, line in enumerate( fh ): fields = line.rstrip('\n\r').split( DELIMITER ) if len(fields) <= max_col: print >> sys.stederr, "Bad data line: ", fields continue if otu_column is not None: otu_names.append( fields[ otu_column ] ) else: otu_names.append( "OTU_%i" % i ) for j, col in enumerate( columns ): counts[ j ].append( int( fields[ col ] ) ) extra_kwds = {} if options.distance_metric in NEEDS_OTU_NAMES: extra_kwds['otu_ids'] = otu_names if options.distance_metric in NEEDS_TREE: assert options.tree, Exception( "You must provide a newick tree when using '%s'" % options.distance_metric ) # NB: TreeNode apparently needs unicode files with codecs.open( options.tree, 'rb', 'utf-8' ) as fh: extra_kwds['tree'] = TreeNode.read( fh ) bd_dm = beta_diversity( options.distance_metric, counts, ids=sample_names, **extra_kwds ) bd_dm.write( options.output ) if __name__ == "__main__": __main__()