Repository 'format_cd_hit_output'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/format_cd_hit_output

Changeset 1:64da677bcee2 (2022-10-19)
Previous changeset 0:4015e9d6d277 (2016-04-26)
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/format_cd_hit_output/ commit eea46077010e699403ce6995d7d4aac77b2e0b43"
modified:
format_cd_hit_output.py
format_cd_hit_output.xml
added:
test-data/categories.txt
test-data/sequences.txt
removed:
test-data/extract_category_distribution_output.tabular
b
diff -r 4015e9d6d277 -r 64da677bcee2 format_cd_hit_output.py
--- a/format_cd_hit_output.py Tue Apr 26 08:55:33 2016 -0400
+++ b/format_cd_hit_output.py Wed Oct 19 14:42:33 2022 +0000
[
b'@@ -1,43 +1,42 @@\n #!/usr/bin/env python\n # -*- coding: utf-8 -*-\n \n-import sys\n-import os\n import argparse\n-import copy\n-import operator\n-from sets import Set\n+\n \n def extract_mapping_info(input_mapping_filepath):\n     mapping_info = {}\n-    categories = Set([])\n+    categories = set([])\n \n-    with open(input_mapping_filepath,\'r\') as mapping_file:\n+    with open(input_mapping_filepath, \'r\') as mapping_file:\n         for line in mapping_file.readlines():\n             split_line = line[:-1].split(\'\\t\')\n-            mapping_info.setdefault(split_line[0],split_line[1])\n+            mapping_info.setdefault(split_line[0], split_line[1])\n             categories.add(split_line[1])\n \n     return mapping_info, categories\n \n-def init_category_distribution(categories = None):\n-    cluster_category_distribution = {}\n-    if categories != None:\n+\n+def init_category_distribution(categories=None):\n+    cluster_categ_distri = {}\n+    if categories is not None:\n         for category in categories:\n-            cluster_category_distribution[category] = 0\n-    return cluster_category_distribution\n+            cluster_categ_distri[category] = 0\n+    return cluster_categ_distri\n+\n \n-def flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, \n-    cluster_category_distribution, categories, output_category_distribution_file, \n-    cluster_seq_number):\n+def flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster,\n+                       cluster_categ_distri, categories,\n+                       output_category_distribution_file, cluster_seq_number):\n     if cluster_name != \'\':\n-        if categories != None:\n-            output_category_distribution_file.write(cluster_name)\n-            output_category_distribution_file.write(\'\\t\' + str(cluster_seq_number))\n+        if categories is not None:\n+            string = cluster_name\n+            string += \'\\t\' + str(cluster_seq_number)\n             for category in categories:\n-                output_category_distribution_file.write(\'\\t\')\n-                output_category_distribution_file.write(str(cluster_category_distribution[category]))\n-            output_category_distribution_file.write(\'\\n\')\n+                string += \'\\t\'\n+                string += str(cluster_categ_distri[category])\n+            string += \'\\n\'\n+            output_category_distribution_file.write(string)\n \n         if cluster_ref_seq == \'\':\n             string = "No reference sequence found for "\n@@ -46,11 +45,12 @@\n \n         ref_seq_cluster.setdefault(cluster_ref_seq, cluster_name)\n \n-def extract_cluster_info(args, mapping_info = None, categories = None):\n+\n+def extract_cluster_info(args, mapping_info=None, categories=None):\n     ref_seq_cluster = {}\n \n-    if args.output_category_distribution != None:\n-        if mapping_info == None or categories == None:\n+    if args.output_category_distribution is not None:\n+        if mapping_info is None or categories is None:\n             string = "A file with category distribution is expected but "\n             string += "no mapping information are available"\n             raise ValueError(string)\n@@ -63,19 +63,24 @@\n     else:\n         output_cat_distri_file = None\n \n-    with open(args.input_cluster_info,\'r\') as cluster_info_file:\n+    with open(args.input_cluster_info, \'r\') as cluster_info_file:\n         cluster_name = \'\'\n-        cluster_category_distribution = init_category_distribution(categories)\n+        cluster_categ_distri = init_category_distribution(categories)\n         cluster_ref_seq = \'\'\n         cluster_seq_number = 0\n         for line in cluster_info_file.readlines():\n             if line[0] == \'>\':\n-                flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, \n-                    cluster_category_distribution, categories, \n-                    output_cat_distri_file, cluster_seq_number)\n+                flush_cluster_info(\n+                    cluster_name,\n+                    cluster_ref_seq,\n+                    ref_seq_clust'..b'   cluster_seq_number = 0\n             else:\n@@ -83,50 +88,58 @@\n                 seq_name = seq_info[1][1:-3]\n                 cluster_seq_number += 1\n \n-                if categories != None:\n+                if categories is not None:\n                     seq_count = 1\n-                    if args.number_sum != None:\n+                    if args.number_sum is not None:\n                         if seq_name.find(\'size\') != -1:\n                             substring = seq_name[seq_name.find(\'size\'):-1]\n                             seq_count = int(substring.split(\'=\')[1])\n-                    if not mapping_info.has_key(seq_name):\n+                    if seq_name not in mapping_info:\n                         string = seq_name + " not found in mapping"\n                         raise ValueError(string)\n                     category = mapping_info[seq_name]\n-                    cluster_category_distribution[category] += seq_count\n-                \n+                    cluster_categ_distri[category] += seq_count\n+\n                 if seq_info[-1] == \'*\':\n                     if cluster_ref_seq != \'\':\n                         string = "A reference sequence (" + cluster_ref_seq\n-                        string += ") already found for cluster " + cluster_name \n+                        string += ") already found for cluster " + cluster_name\n                         string += " (" + seq_name + ")"\n                         raise ValueError(string)\n                     cluster_ref_seq = seq_name\n \n-        flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, \n-            cluster_category_distribution, categories, output_cat_distri_file, \n+        flush_cluster_info(\n+            cluster_name,\n+            cluster_ref_seq,\n+            ref_seq_cluster,\n+            cluster_categ_distri,\n+            categories,\n+            output_cat_distri_file,\n             cluster_seq_number)\n \n-    if args.output_category_distribution != None:\n+    if args.output_category_distribution is not None:\n         output_cat_distri_file.close()\n \n     return ref_seq_cluster\n \n+\n def rename_representative_sequences(args, ref_seq_cluster):\n-    with open(args.input_representative_sequences,\'r\') as input_sequences:\n-        with open(args.output_representative_sequences,\'w\') as output_sequences:\n+    with open(args.input_representative_sequences, \'r\') as input_sequences:\n+        with open(args.output_representative_sequences, \'w\') as output_seq:\n             for line in input_sequences.readlines():\n                 if line[0] == \'>\':\n                     seq_name = line[1:-1]\n-                    if not ref_seq_cluster.has_key(seq_name):\n+                    if seq_name not in ref_seq_cluster:\n                         string = seq_name + " not found as reference sequence"\n                         raise ValueError(string)\n-                    output_sequences.write(\'>\' + ref_seq_cluster[seq_name] + \'\\n\')\n+                    string = \'>\' + ref_seq_cluster[seq_name] + \'\\n\'\n+                    output_seq.write(string)\n                 else:\n-                    output_sequences.write(line)\n+                    output_seq.write(line)\n+\n \n def format_cd_hit_outputs(args):\n-    if args.input_mapping != None:\n+    if args.input_mapping is not None:\n         mapping_info, categories = extract_mapping_info(args.input_mapping)\n     else:\n         mapping_info = None\n@@ -134,9 +147,10 @@\n \n     ref_seq_cluster = extract_cluster_info(args, mapping_info, categories)\n \n-    if args.input_representative_sequences != None:\n+    if args.input_representative_sequences is not None:\n         rename_representative_sequences(args, ref_seq_cluster)\n \n+\n if __name__ == "__main__":\n     parser = argparse.ArgumentParser()\n     parser.add_argument(\'--input_cluster_info\', required=True)\n@@ -147,4 +161,4 @@\n     parser.add_argument(\'--number_sum\')\n     args = parser.parse_args()\n \n-    format_cd_hit_outputs(args)\n\\ No newline at end of file\n+    format_cd_hit_outputs(args)\n'
b
diff -r 4015e9d6d277 -r 64da677bcee2 format_cd_hit_output.xml
--- a/format_cd_hit_output.xml Tue Apr 26 08:55:33 2016 -0400
+++ b/format_cd_hit_output.xml Wed Oct 19 14:42:33 2022 +0000
b
@@ -1,4 +1,4 @@
-<tool id="format_cd_hit_output" name="Format cd-hit outputs" version="1.0.0">
+<tool id="format_cd_hit_output" name="Format cd-hit outputs" version="1.0.0+galaxy1">
     <description>to rename representative sequences with cluster name and/or extract distribution inside clusters given a mapping file</description>
 
     <requirements>
@@ -76,7 +76,13 @@
             <param name="input_mapping" value="input_mapping.txt"/>
             <param name="number_sum" value="true"/>
             <output name="output_representative_sequences" file="rename_representative_sequences_output.fasta"/>
-            <output name="output_category_distribution" file="extract_category_distribution_output.tabular"/>
+            <output name="output_category_distribution">
+                <assert_contents>
+                    <has_size value="139937" delta="100" />
+                    <has_n_lines n="2990"/>
+                    <has_text text="amoA_arc_19F-TV1-T0-b"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
 
@@ -101,4 +107,4 @@
     </help>
 
     <citations></citations>
-</tool>
\ No newline at end of file
+</tool>
b
diff -r 4015e9d6d277 -r 64da677bcee2 test-data/categories.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/categories.txt Wed Oct 19 14:42:33 2022 +0000
b
b'@@ -0,0 +1,2990 @@\n+Cluster\tSequence_number\tamoA_arc_19F-TV1-T0-d\tamoA_arc_19F-TV1-T1-a\tamoA_arc_19F-TV1-T0-a\tamoA_arc_19F-TEM1-T0-c\tamoA_arc_19F-TEM1-T1-a\tamoA_arc_19F-TEM1-T1-b\tamoA_arc_19F-TEM1-T1-d\tamoA_arc_19F-TEM1-T0-d\tamoA_arc_19F-TV1-T0-b\tamoA_arc_19F-TEM1-T1-c\tamoA_arc_19F-TV1-T1-b\tamoA_arc_19F-TEM1-T0-b\tamoA_arc_19F-TV1-T1-d\tamoA_arc_19F-TV1-T0-c\tamoA_arc_19F-TEM1-T0-a\tamoA_arc_19F-TV1-T1-c\n+Cluster_0\t135711\t18509\t16555\t8531\t8344\t15254\t19915\t23703\t16312\t17307\t16167\t20947\t10687\t24776\t17555\t10886\t29944\n+Cluster_1\t1104\t1579\t691\t2368\t1515\t284\t2140\t1427\t841\t1733\t663\t583\t3261\t1315\t5167\t250\t1647\n+Cluster_2\t26\t2\t0\t2\t2\t1\t0\t2\t1\t3\t2\t3\t2\t5\t0\t0\t4\n+Cluster_3\t5\t0\t0\t0\t1\t1\t0\t0\t0\t0\t2\t0\t0\t0\t1\t0\t0\n+Cluster_4\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_5\t3\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_6\t267\t20\t15\t16\t14\t9\t22\t20\t18\t14\t16\t25\t6\t26\t15\t9\t33\n+Cluster_7\t6308\t1764\t1451\t418\t922\t1404\t858\t2305\t1558\t515\t1686\t1949\t693\t2497\t550\t1100\t2893\n+Cluster_8\t3\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t1\n+Cluster_9\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_10\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_11\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_12\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_13\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_14\t3\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_15\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_16\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_17\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_18\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_19\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_20\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_21\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_22\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_23\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_24\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_25\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_26\t2\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_27\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_28\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_29\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_30\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_31\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_32\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_33\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_34\t11\t1\t2\t1\t1\t0\t0\t0\t2\t1\t0\t0\t1\t2\t0\t0\t0\n+Cluster_35\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_36\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_37\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_38\t2\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_39\t15\t0\t2\t1\t1\t1\t1\t1\t1\t1\t0\t1\t1\t1\t1\t0\t2\n+Cluster_40\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_41\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_42\t2\t0\t0\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_43\t12\t0\t1\t2\t0\t1\t1\t0\t0\t0\t2\t1\t2\t0\t0\t0\t2\n+Cluster_44\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_45\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_46\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_47\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_48\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_49\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_50\t3\t0\t1\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_51\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_52\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_53\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_54\t2\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_55\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_56\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_57\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_58\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_59\t2\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_60\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_61\t4\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\n+Cluster_62\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_63\t5\t0\t1\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t1\n+Cluster_64\t2\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_65\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_66\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_67\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_68\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_69\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_70\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_71\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_72\t2\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_73\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_74\t1\t0\t0\t1\t0\t0\t0'..b'\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2906\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2907\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2908\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2909\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2910\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2911\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2912\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2913\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2914\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2915\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2916\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2917\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2918\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2919\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2920\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2921\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2922\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2923\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2924\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2925\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2926\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2927\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2928\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2929\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2930\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2931\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2932\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_2933\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2934\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2935\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2936\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2937\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2938\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2939\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2940\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2941\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2942\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2943\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2944\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2945\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2946\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2947\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2948\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2949\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2950\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2951\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2952\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2953\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2954\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2955\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2956\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2957\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2958\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2959\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2960\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2961\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2962\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2963\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2964\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2965\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2966\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2967\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2968\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2969\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2970\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2971\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2972\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2973\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2974\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2975\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2976\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2977\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2978\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2979\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2980\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2981\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2982\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2983\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2984\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2985\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2986\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2987\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2988\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n'
b
diff -r 4015e9d6d277 -r 64da677bcee2 test-data/extract_category_distribution_output.tabular
--- a/test-data/extract_category_distribution_output.tabular Tue Apr 26 08:55:33 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,2990 +0,0 @@\n-Cluster\tSequence_number\tamoA_arc_19F-TEM1-T1-d\tamoA_arc_19F-TEM1-T1-b\tamoA_arc_19F-TEM1-T0-a\tamoA_arc_19F-TEM1-T0-b\tamoA_arc_19F-TEM1-T0-c\tamoA_arc_19F-TEM1-T0-d\tamoA_arc_19F-TEM1-T1-c\tamoA_arc_19F-TEM1-T1-a\tamoA_arc_19F-TV1-T1-d\tamoA_arc_19F-TV1-T1-a\tamoA_arc_19F-TV1-T1-b\tamoA_arc_19F-TV1-T1-c\tamoA_arc_19F-TV1-T0-a\tamoA_arc_19F-TV1-T0-c\tamoA_arc_19F-TV1-T0-b\tamoA_arc_19F-TV1-T0-d\n-Cluster_0\t135711\t23703\t19915\t10886\t10687\t8344\t16312\t16167\t15254\t24776\t16555\t20947\t29944\t8531\t17555\t17307\t18509\n-Cluster_1\t1104\t1427\t2140\t250\t3261\t1515\t841\t663\t284\t1315\t691\t583\t1647\t2368\t5167\t1733\t1579\n-Cluster_2\t26\t2\t0\t0\t2\t2\t1\t2\t1\t5\t0\t3\t4\t2\t0\t3\t2\n-Cluster_3\t5\t0\t0\t0\t0\t1\t0\t2\t1\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_4\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_5\t3\t1\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_6\t267\t20\t22\t9\t6\t14\t18\t16\t9\t26\t15\t25\t33\t16\t15\t14\t20\n-Cluster_7\t6308\t2305\t858\t1100\t693\t922\t1558\t1686\t1404\t2497\t1451\t1949\t2893\t418\t550\t515\t1764\n-Cluster_8\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\n-Cluster_9\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n-Cluster_10\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_11\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n-Cluster_12\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n-Cluster_13\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_14\t3\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_15\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_16\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_17\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_18\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_19\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_20\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_21\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_22\t2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\n-Cluster_23\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_24\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_25\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_26\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_27\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_28\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_29\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_30\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_31\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_32\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_33\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_34\t11\t0\t0\t0\t1\t1\t2\t0\t0\t2\t2\t0\t0\t1\t0\t1\t1\n-Cluster_35\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_36\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_37\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_38\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_39\t15\t1\t1\t0\t1\t1\t1\t0\t1\t1\t2\t1\t2\t1\t1\t1\t0\n-Cluster_40\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_41\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_42\t2\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_43\t12\t0\t1\t0\t2\t0\t0\t2\t1\t0\t1\t1\t2\t2\t0\t0\t0\n-Cluster_44\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_45\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_46\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_47\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_48\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_49\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_50\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\n-Cluster_51\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_52\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_53\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_54\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_55\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_56\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_57\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_58\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_59\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\n-Cluster_60\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_61\t4\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t1\t0\t1\n-Cluster_62\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_63\t5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t1\t1\t1\t0\t0\n-Cluster_64\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\n-Cluster_65\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_66\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_67\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_68\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_69\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_70\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_71\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_72\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_73\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_74\t1\t0\t0\t0\t0\t0\t0'..b'\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2906\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2907\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_2908\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_2909\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_2910\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n-Cluster_2911\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2912\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2913\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2914\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2915\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2916\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2917\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n-Cluster_2918\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n-Cluster_2919\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_2920\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_2921\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_2922\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_2923\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_2924\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_2925\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2926\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2927\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2928\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2929\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2930\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2931\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2932\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2933\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2934\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2935\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2936\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2937\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2938\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2939\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2940\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2941\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2942\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2943\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2944\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2945\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2946\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2947\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2948\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2949\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2950\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2951\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2952\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2953\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2954\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2955\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2956\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2957\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2958\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2959\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2960\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_2961\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n-Cluster_2962\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2963\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n-Cluster_2964\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n-Cluster_2965\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n-Cluster_2966\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n-Cluster_2967\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n-Cluster_2968\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n-Cluster_2969\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2970\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2971\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2972\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2973\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2974\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2975\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2976\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2977\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2978\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2979\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2980\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2981\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2982\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n-Cluster_2983\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n-Cluster_2984\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n-Cluster_2985\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2986\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2987\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n-Cluster_2988\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n'
b
diff -r 4015e9d6d277 -r 64da677bcee2 test-data/sequences.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequences.txt Wed Oct 19 14:42:33 2022 +0000
b
b'@@ -0,0 +1,8974 @@\n+>Cluster_30\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsishstmlavgaiyytlftgvp\n+gtttyyapiltihtwvakgacfalasp\n+>Cluster_2051\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiysmlftglp\n+gpaafsapfmpiypwlaneacfalgy\n+>Cluster_31\n+tthylligvvavnstlltinavdyiiytdwawtsfgvfsisqstmlavgpiyymlfstsq\n+gtvtyyapimtiytwvvkgawfalgyp\n+>Cluster_32\n+ttqylfivvvdvnstlltihageyifytdwawtsfvvfsiskstmlavraiyyllftgvt\n+gtatyyvsimtiytwvakragvalgys\n+>Cluster_33\n+tthylfivvvavtstlltinagdylfytdwewssfvvfsisqstmlavgaiyyllftgvp\n+gtatyystimpiynwvakgawlalgyp\n+>Cluster_34\n+tthylfivvvavnstlltinagdyifytdwawtsfvvlsisqstmlavgaiyfmlftgvp\n+gtatyyatlmtittwvatgacfalgyr\n+>Cluster_35\n+tthylfivvvavnstlltikageyifytdwawasfvvfsifqstmlavgaiyfmlftgvp\n+gtdtyyatimtiytwfakgawfalvyp\n+>Cluster_36\n+tthylfivvvavnstlltinaadyifytdwactsfvvfsipqstmlpvgslytltspglp\n+gpatysatlmtiytwvakvawfaldsp\n+>Cluster_37\n+tthylfivvvavnstlltinagyyifytdsawtsfvvfsipqstmlavgaiyymlltgvp\n+gtatytatimtiytcvaqgswfafgyp\n+>Cluster_38\n+tthylfivvvvvnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymlftgvp\n+gtdtyydtmmtvytwvaqgawvalgyp\n+>Cluster_39\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymllqgvp\n+gnatyyatiitiytwvatgacfslgyp\n+>Cluster_40\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsiyqssmlavgaiydllftgvp\n+gtdtyyptfktiytwvvkgalialvyp\n+>Cluster_41\n+tthylflvvvavnstlltinagdyifytdwawpsfvvfsisqstmlvvgaiyymiftgvp\n+gtatyyatimtiytllskgacfplgyp\n+>Cluster_42\n+tthylfivvvavnstlltlnagdyifytdwawtsfvvfsisqspmlavaalyymlfpgvp\n+vtatyyatiitiytwfakgagfalgyp\n+>Cluster_2052\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgsiyymlftgvp\n+gtapyyptiitiyiwvatgawfdlvy\n+>Cluster_2982\n+tnhdlfivvvavnstlltikagdyifytdwartsfvvfyifqstvltvgeiyymfftgvp\n+gnatyyatimtiytrvdkgaw\n+>Cluster_43\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymlftgvp\n+vtatyyatiitistlvatgewfalvys\n+>Cluster_44\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavglyymfftgvpg\n+tatyyaplmsiyiwvakgawfalgyiy\n+>Cluster_45\n+tthylfivvvavnstlltinagyyifytdwawtsfvvfsisqstmlvvgtihymlftgvp\n+vtspyyatimtlypwvatgawfalsyp\n+>Cluster_46\n+tthyifivivavnnklftinagdyifytdwawssfvvfsisqstmlavgaiyymlltgvp\n+giatyyatimtiytwvakvalialghp\n+>Cluster_47\n+tthylftvvvsvnstlltinagdyifytdwawtsfvvssisqstmlvfgaiyymlftgvp\n+gtatyyapiltiytwvakgacfasgyp\n+>Cluster_48\n+tthylfivlvavnstlltinagdyifytdwawtsfvvfsisqstmlsvgaiyymlftgip\n+gtatysatlmtiytwvakvacftlgyp\n+>Cluster_49\n+tthylfivvvainstlltintgeyifytdwawtsfvvfsisqstmiavgaisymqftglp\n+gtatyyatimtlytmvakgawfalvyp\n+>Cluster_50\n+tkhylfivvvavnstlltitagdyifytdwawtsfvvfsisqstmlavgtiyymmftgvt\n+gtatyyatimtiytlvakdawfafgyp\n+>Cluster_51\n+tthylfivvvavnstlltinagdyifytdwmwssfvvfsisqstmlvagaisymlftgfp\n+gtatyyatlmpiytwvakgewfslgyt\n+>Cluster_2630\n+tthylfivvvavhrtlltinagdyifytdwawtsfvvfsisqsrmvvggeiycmlvtgvp\n+gtatdyatsitiytwvvkaqwfalg\n+>Cluster_2631\n+tthylfivvvavnstlltinagdyifytdwiwssfvvfsisqhtmlvvgaiyymlftrvt\n+gtatyydtlmtvytwvdkgawfals\n+>Cluster_52\n+tthylfivvvavnsplltinagdyifytdwmwpsfvvfslfqstmlavaaiyymlftgvp\n+gtatyyspimtiytwvtkgawfalgyr\n+>Cluster_53\n+tthdlfivvvavnstlltikagdyifytdwawtsfgvfsisqstmltvgaiyymlfkvvp\n+gtatyyatimtiytlvakgawfafgyh\n+>Cluster_54\n+tthylfivvvavnstlltinagdyifytdwartsfvvfsisqstllvvgaiyymlftgvp\n+gtatyyenimtvytwvangagfafgyp\n+>Cluster_2053\n+tthywlivvvavnstlitinagdyifytdwawtsfvvfsisqstmiavgaiyymmftgvp\n+gtatyyatlmtiytwvakgagfalgy\n+>Cluster_2054\n+tthylfivvvavnntlltinagdyifytdwawtsfvvfsisqstmfavgaiyymlftgvp\n+etatysasimpiytwvakgawfafgy\n+>Cluster_2055\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyymlftgfl\n+gpppiyptslqvntgvakvhcfsley\n+>Cluster_2056\n+tthylfivvvavnstlltinaenyifytdwewtsfvvfsisqstmlvvgaiyymlstglp\n+gtakyyapfmtiytgvakgawfalgy\n+>Cluster_2057\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsvsqstrlvvgaidymlftgvp\n+gmatyyatimtintrvakgacfalvh\n+>Cluster_55\n+tthylfivvvavnstlltinagdyifytdwxwtiivvfliskltkfvvcaihfllfngvp\n+gtannyatiltnhtwvakgawfalgyp\n+>Cluster_56\n+tthylfivvvavnstlltinagdyifytdwawtsflvfsisqstmlvvgaiyymlftgvp\n+gpatyyspfitistwvpkgawfaleyp\n+>Cluster_57\n+tthylfigvvavnstlltinagdcifytdwgwtsfvvfsisqstmlavgavyyllfrgvp\n+gtatyyatlmtiytwvakaawfalvyt\n+>Cluster_58\n+tthylfigvva'..b'iiytwvakgawfalgyh\n+>Cluster_2040\n+tthylfivvvavnstlltinagdyifytdwmwssfvvfsisqstmlvvgaiyfmlftgfp\n+gtatyyatimtistwvakgalfafsyl\n+>Cluster_2041\n+tthylfivvvavtrtfltiipatfifktdwvwlsllffsfwnsrwlvlgqisyfpflxip\n+wtaaylsynlaifnwvgkgsmfsfgfp\n+>Cluster_2614\n+tthylfivvvavnstlltlnagdyifytdwawtsfvvfsishstmlvvaaiyymlftavp\n+vtvtyydtimtlyigvapgawfalgy\n+>Cluster_2615\n+tthylfivvvavnstlltinagdyifytdlawtsflvfsisqstmlavgaiyyvlltgvp\n+gtatsyptimtiyllvakgalvalgy\n+>Cluster_2616\n+tthylfivvvavnstllainpgdyifytewawtsfvvfsvskstmlvvgaiyymlftgvp\n+gtatysatlmtistwvakgagfalgy\n+>Cluster_2617\n+tthylfivvvavnstmltinagdyifctdcawtsfvvfsisqstmlvvgaiyymlftgvp\n+gtatyystimtiytwvakaalialay\n+>Cluster_2618\n+ttpylfivvvavnstlltinagdyifytdwvwtsfvvfsisqstmlvvgaiyymlftevp\n+gtvtyyativtiytwvaevvwfalry\n+>Cluster_2619\n+pthylfivvvavnsplltihagdyifytdwactsfvvfsisqstmlvvgaiyymlftvfp\n+gtatyyatimpiytvvskfacfalgy\n+>Cluster_2042\n+tthylfivvvavnstlltinagdyifytdwmwtsfvvfsisqstmlavgaisymlftvfp\n+gtatyyatimtiytwvvkggwfelgnr\n+>Cluster_2958\n+tphylfivvvavtstlitiiegdyifytdsawtsfvvfsisistmlvvvsilymlltrvp\n+gtatyyattltfytwvpigswla\n+>Cluster_2791\n+tthysfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyyllltgvp\n+gtttyyapimtiytwvaqgawfpfg\n+>Cluster_2905\n+tthyifivvvavnstlltikagdyifytdwawtsfvvfslyqsnmlvvsklyymlftglt\n+giatyyppfmiittwlangpwfal\n+>Cluster_2792\n+tthylfivvvavnsrlltinegdyifctdwawtsfvvfsvsqstmvvvgaiyymlltgvp\n+gtatyyatistiytgvakgalfslg\n+>Cluster_2043\n+tthnlfivvvavnstmitinagdyifytdwawtsfvvfsisqstmlvvlaiyymlftevp\n+gtakycaaimtiytwvdkgaccalgyp\n+>Cluster_2044\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsiyqstlfvvgafysllstgfp\n+etetyfatirtiyiwvpkgacfalesq\n+>Cluster_2045\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqctmlavgaisymlftgvp\n+gtatysatimtistlvakgawlpfghh\n+>Cluster_2620\n+tthylfivvvavnstlltinageyifytdwawtsfvvfsipqstmvavgaiyymlftgvp\n+gtatyndtfmiiytwvykgacvgcgy\n+>Cluster_2621\n+tthylfivvvalnstlltinagdyifytdwewtsfvvssisqstmlvvgaiyymlltgvp\n+gtesyyatiltiytwvvngvelalgy\n+>Cluster_2793\n+tthylfivvvavdstlltinagdylfytdwawtsfavfsisqskmlvvgaiyyvlfpgva\n+gtdtyyatimtiytcvakgawfalg\n+>Cluster_2981\n+tthylfivvvavnstqltinagdyifytdwawtsfvvfsishstmpavgaryymlftgvp\n+gtatsyatimiiytwlakgawl\n+>Cluster_2906\n+tthylfifvfavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyyilftgvp\n+gtapyystfrtiytlvakgpwfal\n+>Cluster_2959\n+ttnylfivvvavnstlltinagdylfytdwawtsfvvfpisqstmlavgaiyymlltgdp\n+gtatynatiltmypwvatcawfa\n+>Cluster_2622\n+tthylfivvvavnstlltlnagdyifytdtawtlfvvfsisqstmlvvgaiyyvllpgvp\n+vtatyyattmtiymwvakgawfalgy\n+>Cluster_2623\n+tthylffvvvavtstlltinlgdyifyidwawssfvvfsvlqstmlvvgsiysmfftgvp\n+gtstyyatimtiytwfakgawfalgs\n+>Cluster_2794\n+ttpylfivvvavnrtlrtiiagdyifytdwawtslvvfsisqstmlavaaiyymlstgvp\n+vtatyyatiitiytwvakgawfalg\n+>Cluster_2795\n+tthylfivlvavsstlltiyagdyifytdwawtsflvfsisqstmlvvgaiyymlftgvp\n+gtatsyatimtiptslakaasfsla\n+>Cluster_2796\n+ttqylfivvvavnstlltinagdyifytdrmwssfvvvsisqstmlvvgaiyymlftgvp\n+gtatyhatimtvytcvakgawlvlc\n+>Cluster_2624\n+tthylfivvvavnstlltinagdyifytdwawtssevfsisqskrlvfgaffylfftglp\n+gpatfyaptmpsypwlpkglwfpley\n+>Cluster_2625\n+tthylfivvvavnstlltsnagdyifytdwawtsfvvlsisqskmlevgaiyymlftgvp\n+gtatayatiltiytlvdqgawfalgy\n+>Cluster_2626\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgplnsmpfpgvp\n+gtvtfyvtimqiytgvskgewfalgy\n+>Cluster_2627\n+ttnnlfivvvaanstlitinagdyifytdwmwssfvvfsitqstmlvvgaiyymlltgvp\n+gtatyydtimtiytwvakgawfalgy\n+>Cluster_2628\n+tthdlfivvvavnsplltinagdyilytdwactsfvvfslsqstmlvvgaiyymlftgvl\n+gtatyyatimtlstwvangsclvlgs\n+>Cluster_2629\n+tthylfivvvavnstlitinagdysfytdwmwssfvvfsisqstmvvvgaiyymlftggp\n+gtatyyatimtistwvakgagcalgy\n+>Cluster_2046\n+tthylfivvvavnstlltikagdyifytdwawtsfvvfsisqstmlvvgeiyyllftgvp\n+gtatdyapfitiypcvakvawfvlvyp\n+>Cluster_2047\n+tthylflvvvavnsklltinagdsilytdwawtsfvvfsisqstmlvvgaiyymiftgvt\n+gtgtyyatimtiypwvakgawfalgyp\n+>Cluster_2048\n+tthylfivvvavnstlltinagdyilytdwawtsfvvlsisqstmlavgaidyvlftevp\n+gtaayydtsmtiysgvakgdwlalggp\n+>Cluster_2049\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyympftgvp\n+gpatyfspfmttftwvvkgawfalgfp\n+>Cluster_2050\n+pthylfivvvavnstlltinagdyifytdwawtsflvfsisqstmlvvgaiyymlitgvp\n+gtatyyatfmtiytllakdsllslgyp\n'