# HG changeset patch
# User iuc
# Date 1474647458 14400
# Node ID 63706c95c9ed84bb71a72f21f45ecfa55a5e04b1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scikit_bio commit d46d41c5fec10407bd6b5cb77a11d9b43b82b95e
diff -r 000000000000 -r 63706c95c9ed macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Sep 23 12:17:38 2016 -0400
@@ -0,0 +1,39 @@
+
+
+
+ scikit-bio
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0.4.2
+
+
+
+
+
+
+
+
+
+
+
+ @unpublished{scikit-bio:2016,
+ title = "scikit-bio",
+ author = "Contributors",
+ url = "http://scikit-bio.org/",
+ year = "2016 (accessed April 1, 2016)"
+ }
+
+
+
diff -r 000000000000 -r 63706c95c9ed scikit_bio_diversity_beta_diversity.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scikit_bio_diversity_beta_diversity.py Fri Sep 23 12:17:38 2016 -0400
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+# Reports a beta diversity matrix for tabular input file
+# using scikit-bio
+# Daniel Blankenberg
+
+
+import sys
+import optparse
+import codecs
+from skbio.diversity import beta_diversity
+from skbio import TreeNode
+
+
+__VERSION__ = "0.0.1"
+
+DELIMITER = '\t'
+
+NEEDS_TREE = [ 'unweighted_unifrac', 'weighted_unifrac' ]
+
+NEEDS_OTU_NAMES = [ 'unweighted_unifrac', 'weighted_unifrac' ]
+
+
+def __main__():
+ parser = optparse.OptionParser( usage="%prog [options]" )
+ parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='print version and exit' )
+ parser.add_option( '-i', '--input', dest='input', action='store', type="string", default=None, help='Input abundance Filename' )
+ parser.add_option( '', '--otu_column', dest='otu_column', action='store', type="int", default=None, help='OTU ID Column (1 based)' )
+ parser.add_option( '', '--sample_columns', dest='sample_columns', action='store', type="string", default=None, help='Comma separated list of sample columns, unset to use all.' )
+ parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Abundance file has a header line' )
+ parser.add_option( '', '--distance_metric', dest='distance_metric', action='store', type="string", default=None, help='Distance metric to use' )
+ parser.add_option( '', '--tree', dest='tree', action='store', type="string", default=None, help='Newick Tree Filename' )
+ parser.add_option( '-o', '--output', dest='output', action='store', type="string", default=None, help='Output Filename' )
+ (options, args) = parser.parse_args()
+ if options.version:
+ print >> sys.stderr, "scikit-bio betadiversity from tabular file", __VERSION__
+ sys.exit()
+
+ if options.otu_column is not None:
+ otu_column = options.otu_column - 1
+ else:
+ otu_column = None
+
+ if options.sample_columns is None:
+ with open( options.input, 'rb' ) as fh:
+ line = fh.readline()
+ columns = range( len( line.split( DELIMITER ) ) )
+ if otu_column in columns:
+ columns.remove( otu_column )
+ else:
+ columns = map( lambda x: int( x ) - 1, options.sample_columns.split( "," ) )
+
+ max_col = max( columns + [otu_column] )
+ counts = [ [] for x in columns ]
+ sample_names = []
+ otu_names = []
+ with open( options.input, 'rb' ) as fh:
+ if options.header:
+ header = fh.readline().rstrip('\n\r').split( DELIMITER )
+ sample_names = [ header[i] for i in columns ]
+ else:
+ sample_names = [ "SAMPLE_%i" % x for x in range( len( columns ) ) ]
+ for i, line in enumerate( fh ):
+ fields = line.rstrip('\n\r').split( DELIMITER )
+ if len(fields) <= max_col:
+ print >> sys.stederr, "Bad data line: ", fields
+ continue
+ if otu_column is not None:
+ otu_names.append( fields[ otu_column ] )
+ else:
+ otu_names.append( "OTU_%i" % i )
+ for j, col in enumerate( columns ):
+ counts[ j ].append( int( fields[ col ] ) )
+
+ extra_kwds = {}
+ if options.distance_metric in NEEDS_OTU_NAMES:
+ extra_kwds['otu_ids'] = otu_names
+ if options.distance_metric in NEEDS_TREE:
+ assert options.tree, Exception( "You must provide a newick tree when using '%s'" % options.distance_metric )
+ # NB: TreeNode apparently needs unicode files
+ with codecs.open( options.tree, 'rb', 'utf-8' ) as fh:
+ extra_kwds['tree'] = TreeNode.read( fh )
+
+ bd_dm = beta_diversity( options.distance_metric, counts, ids=sample_names, **extra_kwds )
+ bd_dm.write( options.output )
+
+if __name__ == "__main__":
+ __main__()
diff -r 000000000000 -r 63706c95c9ed scikit_bio_diversity_beta_diversity.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scikit_bio_diversity_beta_diversity.xml Fri Sep 23 12:17:38 2016 -0400
@@ -0,0 +1,123 @@
+
+
+ using scikit-bio
+
+
+ macros.xml
+
+
+
+
+
+
+
+ #if str( $distance_metric.beta_diversity_method ) in [ 'unweighted_unifrac', 'weighted_unifrac' ] then $distance_metric.tree else ''#
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 63706c95c9ed test-data/input_abundance_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_abundance_1.tabular Fri Sep 23 12:17:38 2016 -0400
@@ -0,0 +1,86 @@
+#ID sample_one sample_two sample_3
+Crenarchaeota 0 0 1
+Euryarchaeota 0 1 0
+AC1 0 1 2
+AD3 1 1 4
+Acidobacteria 13 14 372
+Actinobacteria 16758 1443 101451
+AncK6 0 0 0
+Aquificae 1 0 12
+Armatimonadetes 4 7 13
+BHI80-139 0 0 8
+BRC1 1 5 9
+Bacteroidetes 5868 270336 13264
+CD12 0 0 0
+Caldiserica 0 0 2
+Caldithrix 0 0 0
+Chlamydiae 1 1 13
+Chlorobi 3 9 11
+Chloroflexi 31 21 463
+Chrysiogenetes 0 0 2
+Cyanobacteria 5 16 123
+Deferribacteres 0 1 1
+EM19 0 0 0
+EM3 0 0 0
+Elusimicrobia 4 4 3
+FBP 0 0 0
+FCPU426 0 0 2
+Fibrobacteres 4 9 24
+Firmicutes 136317 71445 302692
+Fusobacteria 1268 1636 5463
+GAL15 0 0 0
+GN01 0 0 4
+GN02 0 3 48
+GN04 2 6 3
+GOUTA4 0 1 0
+Gemmatimonadetes 1 4 46
+H-178 0 0 0
+Hyd24-12 0 0 0
+KSB3 0 0 11
+Kazan-3B-28 0 0 1
+LCP-89 0 0 0
+LD1 1 1 1
+Lentisphaerae 0 2 12
+MAT-CR-M4-B07 0 0 0
+MVP-21 0 0 0
+MVS-104 0 0 0
+NC10 0 0 0
+NKB19 4 11 17
+NPL-UPA2 0 0 0
+Nitrospirae 2 1 9
+OD1 1 3 19
+OP1 2 2 102
+OP11 0 0 15
+OP3 0 1 8
+OP8 1 0 9
+OP9 1 0 57
+OctSpA1-106 0 0 0
+PAUC34f 0 0 0
+Planctomycetes 16 7 131
+Poribacteria 0 0 0
+Proteobacteria 48361 12121 153808
+SAR406 1 2 7
+SBR1093 0 0 3
+SC4 0 0 2
+SR1 16 4 61
+Spirochaetes 6 11 184
+Synergistetes 2 2 13
+TA06 0 0 0
+TM6 0 2 4
+TM7 76 61 2210
+TPD-58 0 0 0
+Tenericutes 2 3 25
+Thermotogae 1 0 11
+VHS-B3-43 0 0 0
+Verrucomicrobia 55 1240 44
+WPS-2 1 0 0
+WS1 1 0 5
+WS2 0 0 2
+WS3 1 3 0
+WS4 0 0 0
+WS5 0 1 1
+WS6 0 0 1
+WWE1 0 0 7
+ZB3 0 0 2
+[Caldithrix] 3 2 4
+[Thermi] 1 1 22
diff -r 000000000000 -r 63706c95c9ed test-data/input_tree_1.newick
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_tree_1.newick Fri Sep 23 12:17:38 2016 -0400
@@ -0,0 +1,1 @@
+((Crenarchaeota:1.00000,Euryarchaeota:1.00000,Nanoarchaeota:1.00000,'[Parvarchaeota]':1.00000)Archaea:1.00000,(AC1:1.00000,AD3:1.00000,Acidobacteria:1.00000,Actinobacteria:1.00000,AncK6:1.00000,Aquificae:1.00000,Armatimonadetes:1.00000,BHI80-139:1.00000,BRC1:1.00000,Bacteroidetes:1.00000,CD12:1.00000,Caldiserica:1.00000,Caldithrix:1.00000,Chlamydiae:1.00000,Chlorobi:1.00000,Chloroflexi:1.00000,Chrysiogenetes:1.00000,Cyanobacteria:1.00000,Deferribacteres:1.00000,Dictyoglomi:1.00000,EM19:1.00000,EM3:1.00000,Elusimicrobia:1.00000,FBP:1.00000,FCPU426:1.00000,Fibrobacteres:1.00000,Firmicutes:1.00000,Fusobacteria:1.00000,GAL15:1.00000,GN01:1.00000,GN02:1.00000,GN04:1.00000,GOUTA4:1.00000,Gemmatimonadetes:1.00000,H-178:1.00000,Hyd24-12:1.00000,KSB3:1.00000,Kazan-3B-28:1.00000,LCP-89:1.00000,LD1:1.00000,Lentisphaerae:1.00000,MAT-CR-M4-B07:1.00000,MVP-21:1.00000,MVS-104:1.00000,NC10:1.00000,NKB19:1.00000,NPL-UPA2:1.00000,Nitrospirae:1.00000,OC31:1.00000,OD1:1.00000,OP1:1.00000,OP11:1.00000,OP3:1.00000,OP8:1.00000,OP9:1.00000,OctSpA1-106:1.00000,PAUC34f:1.00000,Planctomycetes:1.00000,Poribacteria:1.00000,Proteobacteria:1.00000,SAR406:1.00000,SBR1093:1.00000,SC4:1.00000,SR1:1.00000,Spirochaetes:1.00000,Synergistetes:1.00000,TA06:1.00000,TM6:1.00000,TM7:1.00000,TPD-58:1.00000,Tenericutes:1.00000,Thermotogae:1.00000,VHS-B3-43:1.00000,Verrucomicrobia:1.00000,WPS-2:1.00000,WS1:1.00000,WS2:1.00000,WS3:1.00000,WS4:1.00000,WS5:1.00000,WS6:1.00000,WWE1:1.00000,ZB3:1.00000,'[Caldithrix]':1.00000,'[Thermi]':1.00000)Bacteria:1.00000)root:1.00000;
diff -r 000000000000 -r 63706c95c9ed test-data/output_weighted_unifrac_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_weighted_unifrac_1.tabular Fri Sep 23 12:17:38 2016 -0400
@@ -0,0 +1,4 @@
+ sample_one sample_two sample_3
+sample_one 0.0 1.45881907807 0.274219368588
+sample_two 1.45881907807 0.0 1.46956460092
+sample_3 0.274219368588 1.46956460092 0.0