changeset 3:f9e418125021 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit 66af14bc1642c1ca6ceb21f6018c8d665da890e8
author earlhaminst
date Fri, 28 Apr 2017 12:51:35 -0400
parents 0a33fd8ead70
children 02d73e6ca869
files hcluster_sg_parser.py hcluster_sg_parser.xml test-data/hcluster_sg1.txt test-data/hcluster_sg1_0_output.txt test-data/hcluster_sg1_1_output.txt test-data/hcluster_sg1_2_output.txt test-data/hcluster_sg1_3_output.txt test-data/hcluster_sg1_4_output.txt test-data/hcluster_sg1_5_output.txt test-data/hcluster_sg1_6_output.txt test-data/hcluster_sg1_7_output.txt test-data/hcluster_sg1_8_output.txt
diffstat 12 files changed, 130 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/hcluster_sg_parser.py	Fri Mar 24 12:33:12 2017 -0400
+++ b/hcluster_sg_parser.py	Fri Apr 28 12:51:35 2017 -0400
@@ -1,5 +1,5 @@
 """
-A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster.
+A simple parser to convert the hcluster_sg output into lists of IDs, one list for each cluster.
 
 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset.
 
@@ -21,9 +21,10 @@
         with open(args[0]) as fh:
             for line in fh:
                 line = line.rstrip()
-                (cluster_id, n_ids, id_list) = line.split('\t')
-                n_ids = int(n_ids)
-                id_list = id_list.replace(',', '\n')
+                line_cols = line.split('\t')
+                cluster_id = line_cols[0]
+                n_ids = int(line_cols[-2])
+                id_list = line_cols[-1].replace(',', '\n')
                 if n_ids >= options.min and n_ids <= options.max:
                     outfile = cluster_id + '_output.txt'
                     with open(outfile, 'w') as f:
--- a/hcluster_sg_parser.xml	Fri Mar 24 12:33:12 2017 -0400
+++ b/hcluster_sg_parser.xml	Fri Apr 28 12:51:35 2017 -0400
@@ -13,7 +13,7 @@
 ]]>
     </command>
     <inputs>
-        <param name="inputFile" type="data" format="tabular" label="hcluster output file in 3-column format" help="3-columns format: cluster_id cluster-size cluster-members" />
+        <param name="inputFile" type="data" format="tabular" label="hcluster output file" help="Tab-separated format: cluster_id [other optional columns] cluster-size cluster-members" />
         <param name="min_elems" type="integer" value="" min="0" optional="true" label="Minimum number of cluster elements" />
         <param name="max_elems" type="integer" value="" min="2" optional="true" label="Maximum number of cluster elements" />
     </inputs>
@@ -43,10 +43,25 @@
             </output_collection>
             <output name="discarded" file="discarded.txt" />
         </test>
+        <test>
+            <param name="inputFile" ftype="tabular" value="hcluster_sg1.txt" />
+            <output_collection name="ids_lists" type="list">
+                <element name="0" file="hcluster_sg1_0_output.txt" ftype="txt" />
+                <element name="1" file="hcluster_sg1_1_output.txt" ftype="txt" />
+                <element name="2" file="hcluster_sg1_2_output.txt" ftype="txt" />
+                <element name="3" file="hcluster_sg1_3_output.txt" ftype="txt" />
+                <element name="4" file="hcluster_sg1_4_output.txt" ftype="txt" />
+                <element name="5" file="hcluster_sg1_5_output.txt" ftype="txt" />
+                <element name="6" file="hcluster_sg1_6_output.txt" ftype="txt" />
+                <element name="7" file="hcluster_sg1_7_output.txt" ftype="txt" />
+                <element name="8" file="hcluster_sg1_8_output.txt" ftype="txt" />
+            </output_collection>
+            <output name="discarded" file="empty.txt" />
+        </test>
     </tests>
     <help>
 <![CDATA[
-A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster.
+A simple parser to convert the hcluster_sg output into lists of IDs, one list for each cluster.
 
 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset.
 ]]>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,9 @@
+0	0	57	1.000	0	13	11,0,4,3,5,12,7,10,13,2,1,6,9,
+1	0	23	0.671	0	46	45,39,71,83,49,61,64,38,25,72,44,29,90,35,30,28,62,65,63,88,89,34,82,73,52,74,17,55,41,56,59,51,32,33,87,31,85,27,48,66,50,37,60,77,26,54,
+2	0	0	1.000	0	1	84,
+3	0	33	1.000	0	8	101,96,92,98,94,100,91,99,
+4	0	28	1.000	0	4	40,43,76,42,
+5	0	0	1.000	0	1	8,
+6	0	54	1.000	0	8	18,23,21,15,22,20,14,16,
+7	0	15	0.596	0	17	24,95,58,69,78,36,68,70,57,67,97,79,53,47,75,86,46,
+8	0	0	1.000	0	1	102,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_0_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,13 @@
+11
+0
+4
+3
+5
+12
+7
+10
+13
+2
+1
+6
+9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_1_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,46 @@
+45
+39
+71
+83
+49
+61
+64
+38
+25
+72
+44
+29
+90
+35
+30
+28
+62
+65
+63
+88
+89
+34
+82
+73
+52
+74
+17
+55
+41
+56
+59
+51
+32
+33
+87
+31
+85
+27
+48
+66
+50
+37
+60
+77
+26
+54
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_2_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,1 @@
+84
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_3_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,8 @@
+101
+96
+92
+98
+94
+100
+91
+99
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_4_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,4 @@
+40
+43
+76
+42
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_5_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,1 @@
+8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_6_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,8 @@
+18
+23
+21
+15
+22
+20
+14
+16
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_7_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,17 @@
+24
+95
+58
+69
+78
+36
+68
+70
+57
+67
+97
+79
+53
+47
+75
+86
+46
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_8_output.txt	Fri Apr 28 12:51:35 2017 -0400
@@ -0,0 +1,1 @@
+102