Repository 'hcluster_sg_parser'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/hcluster_sg_parser

Changeset 3:f9e418125021 (2017-04-28)
Previous changeset 2:0a33fd8ead70 (2017-03-24) Next changeset 4:02d73e6ca869 (2019-07-19)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit 66af14bc1642c1ca6ceb21f6018c8d665da890e8
modified:
hcluster_sg_parser.py
hcluster_sg_parser.xml
added:
test-data/hcluster_sg1.txt
test-data/hcluster_sg1_0_output.txt
test-data/hcluster_sg1_1_output.txt
test-data/hcluster_sg1_2_output.txt
test-data/hcluster_sg1_3_output.txt
test-data/hcluster_sg1_4_output.txt
test-data/hcluster_sg1_5_output.txt
test-data/hcluster_sg1_6_output.txt
test-data/hcluster_sg1_7_output.txt
test-data/hcluster_sg1_8_output.txt
b
diff -r 0a33fd8ead70 -r f9e418125021 hcluster_sg_parser.py
--- a/hcluster_sg_parser.py Fri Mar 24 12:33:12 2017 -0400
+++ b/hcluster_sg_parser.py Fri Apr 28 12:51:35 2017 -0400
[
@@ -1,5 +1,5 @@
 """
-A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster.
+A simple parser to convert the hcluster_sg output into lists of IDs, one list for each cluster.
 
 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset.
 
@@ -21,9 +21,10 @@
         with open(args[0]) as fh:
             for line in fh:
                 line = line.rstrip()
-                (cluster_id, n_ids, id_list) = line.split('\t')
-                n_ids = int(n_ids)
-                id_list = id_list.replace(',', '\n')
+                line_cols = line.split('\t')
+                cluster_id = line_cols[0]
+                n_ids = int(line_cols[-2])
+                id_list = line_cols[-1].replace(',', '\n')
                 if n_ids >= options.min and n_ids <= options.max:
                     outfile = cluster_id + '_output.txt'
                     with open(outfile, 'w') as f:
b
diff -r 0a33fd8ead70 -r f9e418125021 hcluster_sg_parser.xml
--- a/hcluster_sg_parser.xml Fri Mar 24 12:33:12 2017 -0400
+++ b/hcluster_sg_parser.xml Fri Apr 28 12:51:35 2017 -0400
[
@@ -13,7 +13,7 @@
 ]]>
     </command>
     <inputs>
-        <param name="inputFile" type="data" format="tabular" label="hcluster output file in 3-column format" help="3-columns format: cluster_id cluster-size cluster-members" />
+        <param name="inputFile" type="data" format="tabular" label="hcluster output file" help="Tab-separated format: cluster_id [other optional columns] cluster-size cluster-members" />
         <param name="min_elems" type="integer" value="" min="0" optional="true" label="Minimum number of cluster elements" />
         <param name="max_elems" type="integer" value="" min="2" optional="true" label="Maximum number of cluster elements" />
     </inputs>
@@ -43,10 +43,25 @@
             </output_collection>
             <output name="discarded" file="discarded.txt" />
         </test>
+        <test>
+            <param name="inputFile" ftype="tabular" value="hcluster_sg1.txt" />
+            <output_collection name="ids_lists" type="list">
+                <element name="0" file="hcluster_sg1_0_output.txt" ftype="txt" />
+                <element name="1" file="hcluster_sg1_1_output.txt" ftype="txt" />
+                <element name="2" file="hcluster_sg1_2_output.txt" ftype="txt" />
+                <element name="3" file="hcluster_sg1_3_output.txt" ftype="txt" />
+                <element name="4" file="hcluster_sg1_4_output.txt" ftype="txt" />
+                <element name="5" file="hcluster_sg1_5_output.txt" ftype="txt" />
+                <element name="6" file="hcluster_sg1_6_output.txt" ftype="txt" />
+                <element name="7" file="hcluster_sg1_7_output.txt" ftype="txt" />
+                <element name="8" file="hcluster_sg1_8_output.txt" ftype="txt" />
+            </output_collection>
+            <output name="discarded" file="empty.txt" />
+        </test>
     </tests>
     <help>
 <![CDATA[
-A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster.
+A simple parser to convert the hcluster_sg output into lists of IDs, one list for each cluster.
 
 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset.
 ]]>
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,9 @@
+0 0 57 1.000 0 13 11,0,4,3,5,12,7,10,13,2,1,6,9,
+1 0 23 0.671 0 46 45,39,71,83,49,61,64,38,25,72,44,29,90,35,30,28,62,65,63,88,89,34,82,73,52,74,17,55,41,56,59,51,32,33,87,31,85,27,48,66,50,37,60,77,26,54,
+2 0 0 1.000 0 1 84,
+3 0 33 1.000 0 8 101,96,92,98,94,100,91,99,
+4 0 28 1.000 0 4 40,43,76,42,
+5 0 0 1.000 0 1 8,
+6 0 54 1.000 0 8 18,23,21,15,22,20,14,16,
+7 0 15 0.596 0 17 24,95,58,69,78,36,68,70,57,67,97,79,53,47,75,86,46,
+8 0 0 1.000 0 1 102,
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_0_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_0_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,13 @@
+11
+0
+4
+3
+5
+12
+7
+10
+13
+2
+1
+6
+9
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_1_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_1_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,46 @@
+45
+39
+71
+83
+49
+61
+64
+38
+25
+72
+44
+29
+90
+35
+30
+28
+62
+65
+63
+88
+89
+34
+82
+73
+52
+74
+17
+55
+41
+56
+59
+51
+32
+33
+87
+31
+85
+27
+48
+66
+50
+37
+60
+77
+26
+54
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_2_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_2_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,1 @@
+84
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_3_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_3_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,8 @@
+101
+96
+92
+98
+94
+100
+91
+99
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_4_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_4_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,4 @@
+40
+43
+76
+42
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_5_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_5_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,1 @@
+8
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_6_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_6_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,8 @@
+18
+23
+21
+15
+22
+20
+14
+16
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_7_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_7_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,17 @@
+24
+95
+58
+69
+78
+36
+68
+70
+57
+67
+97
+79
+53
+47
+75
+86
+46
b
diff -r 0a33fd8ead70 -r f9e418125021 test-data/hcluster_sg1_8_output.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1_8_output.txt Fri Apr 28 12:51:35 2017 -0400
b
@@ -0,0 +1,1 @@
+102