Mercurial > repos > earlhaminst > hcluster_sg_parser

--- a/hcluster_sg_parser.py	Fri Apr 28 12:51:35 2017 -0400
+++ b/hcluster_sg_parser.py	Fri Jul 19 11:44:07 2019 -0400
@@ -17,20 +17,23 @@
     parser.add_option('-M', '--max', type='int', default=sys.maxsize, help='Maximum number of cluster elements')
     options, args = parser.parse_args()

-    with open(args[1], 'w') as discarded_out:
-        with open(args[0]) as fh:
-            for line in fh:
-                line = line.rstrip()
-                line_cols = line.split('\t')
-                cluster_id = line_cols[0]
-                n_ids = int(line_cols[-2])
-                id_list = line_cols[-1].replace(',', '\n')
-                if n_ids >= options.min and n_ids <= options.max:
-                    outfile = cluster_id + '_output.txt'
-                    with open(outfile, 'w') as f:
-                        f.write(id_list)
-                else:
-                    discarded_out.write(id_list)
+    with open(args[2], 'w') as discarded_max_out:
+        with open(args[1], 'w') as discarded_min_out:
+            with open(args[0]) as fh:
+                for line in fh:
+                    line = line.rstrip()
+                    line_cols = line.split('\t')
+                    cluster_id = line_cols[0]
+                    n_ids = int(line_cols[-2])
+                    id_list = line_cols[-1].replace(',', '\n')
+                    if n_ids < options.min:
+                        discarded_min_out.write(id_list)
+                    elif n_ids > options.max:
+                        discarded_max_out.write(id_list)
+                    else:
+                        outfile = cluster_id + '_output.txt'
+                        with open(outfile, 'w') as f:
+                            f.write(id_list)


 if __name__ == "__main__":
--- a/hcluster_sg_parser.xml	Fri Apr 28 12:51:35 2017 -0400
+++ b/hcluster_sg_parser.xml	Fri Jul 19 11:44:07 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="hcluster_sg_parser" name="hcluster_sg parser" version="0.2.0">
+<tool id="hcluster_sg_parser" name="hcluster_sg parser" version="0.2.1">
     <description>converts hcluster_sg 3-column output into lists of IDs</description>
     <command detect_errors="exit_code">
 <![CDATA[
@@ -9,7 +9,8 @@
 #if str($max_elems)
     -M $max_elems
 #end if
-'$discarded'
+'$discarded_min'
+'$discarded_max'
 ]]>
     </command>
     <inputs>
@@ -21,7 +22,8 @@
         <collection name="ids_lists" type="list" label="${tool.name} on ${on_string}">
             <discover_datasets pattern="(?P&lt;designation&gt;.+)_output\.txt" ext="txt" />
         </collection>
-        <data name="discarded" format="txt" label="${tool.name} on ${on_string}: discarded IDs" />
+        <data name="discarded_min" format="txt" label="${tool.name} on ${on_string}: discarded IDs less than specified" />
+        <data name="discarded_max" format="txt" label="${tool.name} on ${on_string}: discarded IDs more than specified" />
     </outputs>
     <tests>
         <test>
@@ -32,7 +34,8 @@
                 <element name="2" file="2_output.txt" ftype="txt" />
                 <element name="3" file="3_output.txt" ftype="txt" />
             </output_collection>
-            <output name="discarded" file="empty.txt" />
+            <output name="discarded_min" file="empty.txt" />
+            <output name="discarded_max" file="empty.txt" />
         </test>
         <test>
             <param name="inputFile" ftype="tabular" value="hcluster_sg.tabular" />
@@ -41,10 +44,11 @@
                 <element name="0" file="0_output.txt" ftype="txt" />
                 <element name="1" file="1_output.txt" ftype="txt" />
             </output_collection>
-            <output name="discarded" file="discarded.txt" />
+            <output name="discarded_min" file="discarded.txt" />
+            <output name="discarded_max" file="empty.txt" />
         </test>
         <test>
-            <param name="inputFile" ftype="tabular" value="hcluster_sg1.txt" />
+            <param name="inputFile" ftype="tabular" value="hcluster_sg1.tabular" />
             <output_collection name="ids_lists" type="list">
                 <element name="0" file="hcluster_sg1_0_output.txt" ftype="txt" />
                 <element name="1" file="hcluster_sg1_1_output.txt" ftype="txt" />
@@ -56,7 +60,8 @@
                 <element name="7" file="hcluster_sg1_7_output.txt" ftype="txt" />
                 <element name="8" file="hcluster_sg1_8_output.txt" ftype="txt" />
             </output_collection>
-            <output name="discarded" file="empty.txt" />
+            <output name="discarded_min" file="empty.txt" />
+            <output name="discarded_max" file="empty.txt" />
         </test>
     </tests>
     <help>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hcluster_sg1.tabular	Fri Jul 19 11:44:07 2019 -0400
@@ -0,0 +1,9 @@
+0	0	57	1.000	0	13	11,0,4,3,5,12,7,10,13,2,1,6,9,
+1	0	23	0.671	0	46	45,39,71,83,49,61,64,38,25,72,44,29,90,35,30,28,62,65,63,88,89,34,82,73,52,74,17,55,41,56,59,51,32,33,87,31,85,27,48,66,50,37,60,77,26,54,
+2	0	0	1.000	0	1	84,
+3	0	33	1.000	0	8	101,96,92,98,94,100,91,99,
+4	0	28	1.000	0	4	40,43,76,42,
+5	0	0	1.000	0	1	8,
+6	0	54	1.000	0	8	18,23,21,15,22,20,14,16,
+7	0	15	0.596	0	17	24,95,58,69,78,36,68,70,57,67,97,79,53,47,75,86,46,
+8	0	0	1.000	0	1	102,
--- a/test-data/hcluster_sg1.txt	Fri Apr 28 12:51:35 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-0	0	57	1.000	0	13	11,0,4,3,5,12,7,10,13,2,1,6,9,
-1	0	23	0.671	0	46	45,39,71,83,49,61,64,38,25,72,44,29,90,35,30,28,62,65,63,88,89,34,82,73,52,74,17,55,41,56,59,51,32,33,87,31,85,27,48,66,50,37,60,77,26,54,
-2	0	0	1.000	0	1	84,
-3	0	33	1.000	0	8	101,96,92,98,94,100,91,99,
-4	0	28	1.000	0	4	40,43,76,42,
-5	0	0	1.000	0	1	8,
-6	0	54	1.000	0	8	18,23,21,15,22,20,14,16,
-7	0	15	0.596	0	17	24,95,58,69,78,36,68,70,57,67,97,79,53,47,75,86,46,
-8	0	0	1.000	0	1	102,