Mercurial > repos > earlhaminst > hcluster_sg_parser

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hcluster_sg_parser.pl	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+#
+use strict;
+use warnings;
+# A simple perl parser to convert hcluster_sg 3-column output into list of ids in separate files
+# hcluster_sg_parser.pl <file>
+
+my $file1 = $ARGV[0];
+open my $fh1, '<', $file1;
+
+while (my $line = <$fh1>) {
+    chomp $line;
+    my @row = split(/\t/, $line);
+
+    my $cluster_id = $row[0];
+    my $id_list = $row[2];
+    # Change commas to newlines
+    $id_list =~ s/\,/\n/g;
+
+    my $outfile = $cluster_id."_output.txt";
+    open(my $fh, '>', $outfile) or die "Could not open file '$outfile' for writing: $!";
+    print $fh $id_list;
+    close $fh;
+}
+close $fh1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hcluster_sg_parser.xml	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,35 @@
+<tool id="hcluster_sg_parser" name="hcluster_sg_parser" version="0.1.1">
+    <description>Converts hcluster_sg 3-column output into lists of ids</description>
+    <command>
+<![CDATA[
+perl $__tool_directory__/hcluster_sg_parser.pl
+$inputFile
+]]>
+    </command>
+    <inputs>
+        <param name="inputFile" type="data" format="tabular" label="hcluster output file in 3-column format" help="3-columns format: cluster_id cluster-size cluster-members" />
+    </inputs>
+    <outputs>
+        <collection name="ids_lists" type="list" label="${tool.name} on ${on_string}">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)_output\.txt" ext="txt" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="inputFile" ftype="tabular" value="hcluster_sg.tabular" />
+            <output_collection name="ids_lists" type="list">
+                <element name="0" file="0_output.txt" ftype="txt" />
+                <element name="1" file="1_output.txt" ftype="txt" />
+                <element name="2" file="2_output.txt" ftype="txt" />
+                <element name="3" file="3_output.txt" ftype="txt" />
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+Simple wrapper for hcluster_sg output parser.
+]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/0_output.txt	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,20 @@
+90
+52
+76
+48
+88
+78
+31
+46
+4
+29
+6
+60
+80
+37
+33
+64
+66
+62
+42
+57
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1_output.txt	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,10 @@
+70
+21
+72
+84
+26
+86
+14
+10
+19
+53
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2_output.txt	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,5 @@
+74
+68
+2
+24
+58
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3_output.txt	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,5 @@
+82
+18
+9
+12
+39
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg.tabular	Mon Dec 12 07:12:23 2016 -0500
@@ -0,0 +1,4 @@
+0	20	90,52,76,48,88,78,31,46,4,29,6,60,80,37,33,64,66,62,42,57,
+1	10	70,21,72,84,26,86,14,10,19,53,
+2	5	74,68,2,24,58,
+3	5	82,18,9,12,39,