Repository 'format_cd_hit_output'
hg clone https://toolshed.g2.bx.psu.edu/repos/bebatut/format_cd_hit_output

Changeset 0:4015e9d6d277 (2016-04-26)
Next changeset 1:64da677bcee2 (2022-10-19)
Commit message:
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_cd_hit_output/ commit 975a480d80c774a1de58c8fc80b71ea44c5c702b-dirty
added:
format_cd_hit_output.py
format_cd_hit_output.xml
test-data/extract_category_distribution_only_output.tabular
test-data/extract_category_distribution_output.tabular
test-data/input_cluster_info.txt
test-data/input_mapping.txt
test-data/input_representative_sequences.fasta
test-data/rename_representative_sequences_output.fasta
b
diff -r 000000000000 -r 4015e9d6d277 format_cd_hit_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/format_cd_hit_output.py Tue Apr 26 08:55:33 2016 -0400
[
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import copy
+import operator
+from sets import Set
+
+def extract_mapping_info(input_mapping_filepath):
+    mapping_info = {}
+    categories = Set([])
+
+    with open(input_mapping_filepath,'r') as mapping_file:
+        for line in mapping_file.readlines():
+            split_line = line[:-1].split('\t')
+            mapping_info.setdefault(split_line[0],split_line[1])
+            categories.add(split_line[1])
+
+    return mapping_info, categories
+
+def init_category_distribution(categories = None):
+    cluster_category_distribution = {}
+    if categories != None:
+        for category in categories:
+            cluster_category_distribution[category] = 0
+    return cluster_category_distribution
+
+def flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, 
+    cluster_category_distribution, categories, output_category_distribution_file, 
+    cluster_seq_number):
+    if cluster_name != '':
+        if categories != None:
+            output_category_distribution_file.write(cluster_name)
+            output_category_distribution_file.write('\t' + str(cluster_seq_number))
+            for category in categories:
+                output_category_distribution_file.write('\t')
+                output_category_distribution_file.write(str(cluster_category_distribution[category]))
+            output_category_distribution_file.write('\n')
+
+        if cluster_ref_seq == '':
+            string = "No reference sequence found for "
+            string += cluster_name
+            raise ValueError(string)
+
+        ref_seq_cluster.setdefault(cluster_ref_seq, cluster_name)
+
+def extract_cluster_info(args, mapping_info = None, categories = None):
+    ref_seq_cluster = {}
+
+    if args.output_category_distribution != None:
+        if mapping_info == None or categories == None:
+            string = "A file with category distribution is expected but "
+            string += "no mapping information are available"
+            raise ValueError(string)
+        output_cat_distri_file = open(args.output_category_distribution, 'w')
+        output_cat_distri_file.write('Cluster\tSequence_number')
+        for category in categories:
+            output_cat_distri_file.write('\t' + category)
+
+        output_cat_distri_file.write('\n')
+    else:
+        output_cat_distri_file = None
+
+    with open(args.input_cluster_info,'r') as cluster_info_file:
+        cluster_name = ''
+        cluster_category_distribution = init_category_distribution(categories)
+        cluster_ref_seq = ''
+        cluster_seq_number = 0
+        for line in cluster_info_file.readlines():
+            if line[0] == '>':
+                flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, 
+                    cluster_category_distribution, categories, 
+                    output_cat_distri_file, cluster_seq_number)
+                cluster_name = line[1:-1]
+                cluster_name = cluster_name.replace(' ','_') 
+                cluster_category_distribution = init_category_distribution(categories)
+                cluster_ref_seq = ''
+                cluster_seq_number = 0
+            else:
+                seq_info = line[:-1].split('\t')[1].split(' ')
+                seq_name = seq_info[1][1:-3]
+                cluster_seq_number += 1
+
+                if categories != None:
+                    seq_count = 1
+                    if args.number_sum != None:
+                        if seq_name.find('size') != -1:
+                            substring = seq_name[seq_name.find('size'):-1]
+                            seq_count = int(substring.split('=')[1])
+                    if not mapping_info.has_key(seq_name):
+                        string = seq_name + " not found in mapping"
+                        raise ValueError(string)
+                    category = mapping_info[seq_name]
+                    cluster_category_distribution[category] += seq_count
+                
+                if seq_info[-1] == '*':
+                    if cluster_ref_seq != '':
+                        string = "A reference sequence (" + cluster_ref_seq
+                        string += ") already found for cluster " + cluster_name 
+                        string += " (" + seq_name + ")"
+                        raise ValueError(string)
+                    cluster_ref_seq = seq_name
+
+        flush_cluster_info(cluster_name, cluster_ref_seq, ref_seq_cluster, 
+            cluster_category_distribution, categories, output_cat_distri_file, 
+            cluster_seq_number)
+
+    if args.output_category_distribution != None:
+        output_cat_distri_file.close()
+
+    return ref_seq_cluster
+
+def rename_representative_sequences(args, ref_seq_cluster):
+    with open(args.input_representative_sequences,'r') as input_sequences:
+        with open(args.output_representative_sequences,'w') as output_sequences:
+            for line in input_sequences.readlines():
+                if line[0] == '>':
+                    seq_name = line[1:-1]
+                    if not ref_seq_cluster.has_key(seq_name):
+                        string = seq_name + " not found as reference sequence"
+                        raise ValueError(string)
+                    output_sequences.write('>' + ref_seq_cluster[seq_name] + '\n')
+                else:
+                    output_sequences.write(line)
+
+def format_cd_hit_outputs(args):
+    if args.input_mapping != None:
+        mapping_info, categories = extract_mapping_info(args.input_mapping)
+    else:
+        mapping_info = None
+        categories = None
+
+    ref_seq_cluster = extract_cluster_info(args, mapping_info, categories)
+
+    if args.input_representative_sequences != None:
+        rename_representative_sequences(args, ref_seq_cluster)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_cluster_info', required=True)
+    parser.add_argument('--input_representative_sequences')
+    parser.add_argument('--output_representative_sequences')
+    parser.add_argument('--input_mapping')
+    parser.add_argument('--output_category_distribution')
+    parser.add_argument('--number_sum')
+    args = parser.parse_args()
+
+    format_cd_hit_outputs(args)
\ No newline at end of file
b
diff -r 000000000000 -r 4015e9d6d277 format_cd_hit_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/format_cd_hit_output.xml Tue Apr 26 08:55:33 2016 -0400
[
@@ -0,0 +1,104 @@
+<tool id="format_cd_hit_output" name="Format cd-hit outputs" version="1.0.0">
+    <description>to rename representative sequences with cluster name and/or extract distribution inside clusters given a mapping file</description>
+
+    <requirements>
+    </requirements>
+
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+
+    <version_command>
+    </version_command>
+
+    <command><![CDATA[
+        python $__tool_directory__/format_cd_hit_output.py
+            --input_cluster_info $input_cluster_info
+
+            #if $rename_repr_seq.rename_repr_seq_test == "true"
+                --input_representative_sequences $rename_repr_seq.input_representative_sequences
+                --output_representative_sequences $output_representative_sequences
+            #end if
+
+            #if $extract_cat_distri.extract_cat_distri_test == "true"
+                --input_mapping $extract_cat_distri.input_mapping
+                --output_category_distribution $output_category_distribution
+                $extract_cat_distri.number_sum
+            #end if
+    ]]>
+    </command>
+
+    <inputs>
+        <param type="data" format="txt" name="input_cluster_info" label="Cluster info" help="(--input_cluster_info)"/>
+        <conditional name="rename_repr_seq">
+            <param name='rename_repr_seq_test' type="select" label="Rename representative sequences with the  corresponding cluster name?" help="">
+                <option value="true" selected="true">Yes</option>
+                <option value="false">No</option>
+            </param>
+            <when value="true">
+                <param type="data" format="fasta" 
+                    name="input_representative_sequences"
+                    label="Representative sequences" help="--input_representative_sequences)"/>
+            </when>
+            <when value="false" />
+        </conditional>
+
+        <conditional name="extract_cat_distri">
+            <param name='extract_cat_distri_test' type="select" label="Extract category distribution of each cluster?" help="">
+                <option value="true" selected="true">Yes</option>
+                <option value="false">No</option>
+            </param>
+            <when value="true">
+                <param type="data" format="tabular,tsv,csv" name="input_mapping" label="Mapping file" help="The mapping file is a tabular file with 2 columns. First column contains the sequence    names and the second one the corresponding category (--input_mapping)"/>
+                <param name='number_sum' type='boolean' checked="true" truevalue="--number_sum 1" falsevalue="" label="Sum sequence number for each category?" help="The alternative is the sum of size for sequences in each category (if the size information is available in sequence name, --number_sum)"/>
+            </when>
+            <when value="false" />
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output_representative_sequences" format="fasta"
+            label="${tool.name} on ${on_string}: Renamed representative sequences">
+            <filter>((rename_repr_seq['rename_repr_seq_test'] == "true"))</filter>
+        </data>
+        <data name="output_category_distribution" format="tabular" 
+            label="${tool.name} on ${on_string}: Category distribution">
+            <filter>((extract_cat_distri['extract_cat_distri_test'] == "true"))</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_cluster_info" value="input_cluster_info.txt"/>
+            <param name="rename_repr_seq_test" value="true"/>
+            <param name="input_representative_sequences" value="input_representative_sequences.fasta"/>
+            <param name="extract_cat_distri_test" value="true"/>
+            <param name="input_mapping" value="input_mapping.txt"/>
+            <param name="number_sum" value="true"/>
+            <output name="output_representative_sequences" file="rename_representative_sequences_output.fasta"/>
+            <output name="output_category_distribution" file="extract_category_distribution_output.tabular"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+
+**What it does**
+
+This tool format cd-hit outputs (cluster information and cluster representative sequences) to rename representative sequences with cluster name and/or extract category distribution inside clusters given a mapping file.
+
+The tool takes as input:
+
+ - The cd-hit output file with cluster information
+ - The cd-hit output file with representative sequences for each cluster (optional)
+ - A mapping file in tabular format with first column being the sequence names (corresponding to the ones in cluster information file) and the second column being the corresponding categories (optional)
+
+The tool generates different outputs given chosen parameters:
+
+ - A file with representative sequences of each cluster named with the cluster name
+ - A tabular file with lines corresponding to clusters, columns to categories (and one column with sequence number in the cluster), and cases to number of sequences of the given category in the cluster
+
+]]>
+    </help>
+
+    <citations></citations>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r 4015e9d6d277 test-data/extract_category_distribution_only_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_category_distribution_only_output.tabular Tue Apr 26 08:55:33 2016 -0400
b
b'@@ -0,0 +1,2990 @@\n+Cluster\tSequence_number\tamoA_arc_19F-TEM1-T1-d\tamoA_arc_19F-TEM1-T1-b\tamoA_arc_19F-TEM1-T0-a\tamoA_arc_19F-TEM1-T0-b\tamoA_arc_19F-TEM1-T0-c\tamoA_arc_19F-TEM1-T0-d\tamoA_arc_19F-TEM1-T1-c\tamoA_arc_19F-TEM1-T1-a\tamoA_arc_19F-TV1-T1-d\tamoA_arc_19F-TV1-T1-a\tamoA_arc_19F-TV1-T1-b\tamoA_arc_19F-TV1-T1-c\tamoA_arc_19F-TV1-T0-a\tamoA_arc_19F-TV1-T0-c\tamoA_arc_19F-TV1-T0-b\tamoA_arc_19F-TV1-T0-d\n+Cluster_0\t135711\t11106\t9607\t5440\t6231\t4819\t7999\t7744\t7236\t11746\t8210\t9933\t13995\t5051\t9510\t8140\t8944\n+Cluster_1\t1104\t79\t69\t35\t47\t34\t63\t53\t56\t112\t70\t78\t128\t46\t84\t93\t57\n+Cluster_2\t26\t1\t0\t0\t2\t2\t1\t2\t1\t4\t0\t3\t4\t2\t0\t3\t1\n+Cluster_3\t5\t0\t0\t0\t0\t1\t0\t2\t1\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_4\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_5\t3\t1\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_6\t267\t20\t21\t9\t6\t12\t18\t14\t9\t25\t14\t25\t30\t15\t15\t14\t20\n+Cluster_7\t6308\t503\t470\t318\t355\t240\t392\t415\t293\t571\t281\t427\t577\t226\t412\t392\t436\n+Cluster_8\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\n+Cluster_9\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_10\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_11\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_12\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_13\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_14\t3\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_15\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_16\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_17\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_18\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_19\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_20\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_21\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_22\t2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\n+Cluster_23\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_24\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_25\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_26\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_27\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_28\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_29\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_30\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_31\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_32\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_33\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_34\t11\t0\t0\t0\t1\t1\t2\t0\t0\t2\t2\t0\t0\t1\t0\t1\t1\n+Cluster_35\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_36\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_37\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_38\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_39\t15\t1\t1\t0\t1\t1\t1\t0\t1\t1\t2\t1\t2\t1\t1\t1\t0\n+Cluster_40\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_41\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_42\t2\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_43\t12\t0\t1\t0\t2\t0\t0\t2\t1\t0\t1\t1\t2\t2\t0\t0\t0\n+Cluster_44\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_45\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_46\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_47\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_48\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_49\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_50\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\n+Cluster_51\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_52\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_53\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_54\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_55\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_56\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_57\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_58\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_59\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\n+Cluster_60\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_61\t4\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t1\t0\t1\n+Cluster_62\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_63\t5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t1\t1\t1\t0\t0\n+Cluster_64\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\n+Cluster_65\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_66\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_67\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_68\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_69\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_70\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_71\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_72\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_73\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_74\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_75\t1\t0\t0\t0\t0\t0\t'..b'\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2906\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2907\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2908\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2909\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2910\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_2911\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2912\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2913\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2914\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2915\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2916\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2917\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2918\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2919\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2920\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2921\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2922\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2923\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2924\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2925\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2926\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2927\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2928\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2929\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2930\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2931\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2932\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2933\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2934\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2935\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2936\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2937\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2938\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2939\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2940\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2941\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2942\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2943\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2944\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2945\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2946\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2947\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2948\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2949\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2950\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2951\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2952\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2953\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2954\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2955\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2956\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2957\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2958\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2959\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2960\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2961\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_2962\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2963\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2964\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2965\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2966\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2967\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2968\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2969\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2970\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2971\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2972\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2973\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2974\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2975\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2976\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2977\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2978\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2979\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2980\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2981\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2982\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2983\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2984\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2985\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2986\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2987\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2988\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n'
b
diff -r 000000000000 -r 4015e9d6d277 test-data/extract_category_distribution_output.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/extract_category_distribution_output.tabular Tue Apr 26 08:55:33 2016 -0400
b
b'@@ -0,0 +1,2990 @@\n+Cluster\tSequence_number\tamoA_arc_19F-TEM1-T1-d\tamoA_arc_19F-TEM1-T1-b\tamoA_arc_19F-TEM1-T0-a\tamoA_arc_19F-TEM1-T0-b\tamoA_arc_19F-TEM1-T0-c\tamoA_arc_19F-TEM1-T0-d\tamoA_arc_19F-TEM1-T1-c\tamoA_arc_19F-TEM1-T1-a\tamoA_arc_19F-TV1-T1-d\tamoA_arc_19F-TV1-T1-a\tamoA_arc_19F-TV1-T1-b\tamoA_arc_19F-TV1-T1-c\tamoA_arc_19F-TV1-T0-a\tamoA_arc_19F-TV1-T0-c\tamoA_arc_19F-TV1-T0-b\tamoA_arc_19F-TV1-T0-d\n+Cluster_0\t135711\t23703\t19915\t10886\t10687\t8344\t16312\t16167\t15254\t24776\t16555\t20947\t29944\t8531\t17555\t17307\t18509\n+Cluster_1\t1104\t1427\t2140\t250\t3261\t1515\t841\t663\t284\t1315\t691\t583\t1647\t2368\t5167\t1733\t1579\n+Cluster_2\t26\t2\t0\t0\t2\t2\t1\t2\t1\t5\t0\t3\t4\t2\t0\t3\t2\n+Cluster_3\t5\t0\t0\t0\t0\t1\t0\t2\t1\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_4\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_5\t3\t1\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_6\t267\t20\t22\t9\t6\t14\t18\t16\t9\t26\t15\t25\t33\t16\t15\t14\t20\n+Cluster_7\t6308\t2305\t858\t1100\t693\t922\t1558\t1686\t1404\t2497\t1451\t1949\t2893\t418\t550\t515\t1764\n+Cluster_8\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\n+Cluster_9\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_10\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_11\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_12\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_13\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_14\t3\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_15\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_16\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_17\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_18\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_19\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_20\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_21\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_22\t2\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\t0\t0\t0\t0\n+Cluster_23\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_24\t1\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_25\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_26\t2\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_27\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_28\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_29\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_30\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_31\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_32\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_33\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_34\t11\t0\t0\t0\t1\t1\t2\t0\t0\t2\t2\t0\t0\t1\t0\t1\t1\n+Cluster_35\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_36\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_37\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_38\t2\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_39\t15\t1\t1\t0\t1\t1\t1\t0\t1\t1\t2\t1\t2\t1\t1\t1\t0\n+Cluster_40\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_41\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_42\t2\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_43\t12\t0\t1\t0\t2\t0\t0\t2\t1\t0\t1\t1\t2\t2\t0\t0\t0\n+Cluster_44\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_45\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_46\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_47\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_48\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_49\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_50\t3\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t1\t0\t0\t0\n+Cluster_51\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_52\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_53\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_54\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_55\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_56\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_57\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_58\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_59\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t0\t0\t0\n+Cluster_60\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_61\t4\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t1\t1\t0\t1\n+Cluster_62\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_63\t5\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t1\t1\t1\t0\t0\n+Cluster_64\t2\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t1\t0\t0\n+Cluster_65\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_66\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_67\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_68\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_69\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_70\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_71\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_72\t2\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_73\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_74\t1\t0\t0\t0\t0\t0\t0'..b'\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2906\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2907\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2908\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2909\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2910\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_2911\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2912\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2913\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2914\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2915\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2916\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2917\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2918\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2919\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2920\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2921\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2922\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2923\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2924\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2925\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2926\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2927\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2928\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2929\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2930\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2931\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2932\t1\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2933\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2934\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2935\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2936\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2937\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2938\t1\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2939\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2940\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2941\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2942\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2943\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2944\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2945\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2946\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2947\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2948\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2949\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2950\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2951\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2952\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2953\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2954\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2955\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2956\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2957\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2958\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2959\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2960\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2961\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\n+Cluster_2962\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2963\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\n+Cluster_2964\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2965\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2966\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2967\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\n+Cluster_2968\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\n+Cluster_2969\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2970\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2971\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2972\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2973\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2974\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2975\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2976\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2977\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2978\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2979\t1\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2980\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2981\t1\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2982\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\n+Cluster_2983\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\n+Cluster_2984\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\n+Cluster_2985\t1\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2986\t1\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2987\t1\t0\t0\t0\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\n+Cluster_2988\t1\t0\t0\t0\t0\t0\t1\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n'
b
diff -r 000000000000 -r 4015e9d6d277 test-data/input_cluster_info.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_cluster_info.txt Tue Apr 26 08:55:33 2016 -0400
b
b'@@ -0,0 +1,150073 @@\n+>Cluster 0\n+0\t87aa, >M00987:61:000000000-A8R49:1:2112:17537:2141;size=366;... at 1:87:1:87/98.85%\n+1\t87aa, >M00987:61:000000000-A8R49:1:1110:12700:1843;size=236;... at 1:87:1:87/98.85%\n+2\t87aa, >M00987:61:000000000-A8R49:1:2105:13510:5899;size=171;... at 1:87:1:87/98.85%\n+3\t87aa, >M00987:61:000000000-A8R49:1:2105:13866:12140;size=140;... at 1:87:1:87/98.85%\n+4\t87aa, >M00987:61:000000000-A8R49:1:1113:22826:20271;size=118;... at 1:87:1:87/98.85%\n+5\t87aa, >M00987:61:000000000-A8R49:1:2111:20895:21768;size=100;... at 1:87:1:87/98.85%\n+6\t87aa, >M00987:61:000000000-A8R49:1:1110:17065:16689;size=85;... at 1:87:1:87/98.85%\n+7\t87aa, >M00987:61:000000000-A8R49:1:1109:16753:15539;size=84;... at 1:87:1:87/98.85%\n+8\t87aa, >M00987:61:000000000-A8R49:1:2104:20140:17213;size=78;... at 1:87:1:87/96.55%\n+9\t87aa, >M00987:61:000000000-A8R49:1:1108:14116:21837;size=72;... at 1:87:1:87/98.85%\n+10\t87aa, >M00987:61:000000000-A8R49:1:2104:12433:22290;size=57;... at 1:87:1:87/98.85%\n+11\t87aa, >M00987:61:000000000-A8R49:1:1103:20251:11748;size=53;... at 1:87:1:87/98.85%\n+12\t87aa, >M00987:61:000000000-A8R49:1:2101:22220:15964;size=48;... at 1:87:1:87/98.85%\n+13\t87aa, >M00987:61:000000000-A8R49:1:2111:13291:23425;size=45;... at 1:87:1:87/98.85%\n+14\t87aa, >M00987:61:000000000-A8R49:1:2112:12127:11404;size=35;... at 1:87:1:87/98.85%\n+15\t87aa, >M00987:61:000000000-A8R49:1:1104:7152:13973;size=33;... at 1:87:1:87/98.85%\n+16\t87aa, >M00987:61:000000000-A8R49:1:2118:15157:12148;size=32;... at 1:87:1:87/97.70%\n+17\t87aa, >M00987:61:000000000-A8R49:1:2114:14767:2871;size=31;... at 1:87:1:87/98.85%\n+18\t87aa, >M00987:61:000000000-A8R49:1:2106:23792:18380;size=31;... at 1:87:1:87/98.85%\n+19\t87aa, >M00987:61:000000000-A8R49:1:2105:19077:5826;size=30;... at 1:87:1:87/98.85%\n+20\t87aa, >M00987:61:000000000-A8R49:1:1119:21204:10429;size=27;... at 1:87:1:87/98.85%\n+21\t87aa, >M00987:61:000000000-A8R49:1:2105:14405:18256;size=26;... at 1:87:1:87/97.70%\n+22\t87aa, >M00987:61:000000000-A8R49:1:2113:18522:20174;size=26;... at 1:87:1:87/97.70%\n+23\t87aa, >M00987:61:000000000-A8R49:1:2103:14130:17528;size=25;... at 1:87:1:87/98.85%\n+24\t87aa, >M00987:61:000000000-A8R49:1:2112:14594:11585;size=24;... at 1:87:1:87/98.85%\n+25\t87aa, >M00987:61:000000000-A8R49:1:2107:18506:8195;size=23;... at 1:87:1:87/98.85%\n+26\t87aa, >M00987:61:000000000-A8R49:1:2104:4621:13376;size=22;... at 1:87:1:87/98.85%\n+27\t87aa, >M00987:61:000000000-A8R49:1:1119:11102:11148;size=22;... at 1:87:1:87/98.85%\n+28\t87aa, >M00987:61:000000000-A8R49:1:2112:12179:14971;size=21;... at 1:87:1:87/98.85%\n+29\t87aa, >M00987:61:000000000-A8R49:1:1109:14535:24323;size=19;... at 1:87:1:87/97.70%\n+30\t87aa, >M00987:61:000000000-A8R49:1:1104:8133:4042;size=19;... at 1:87:1:87/98.85%\n+31\t87aa, >M00987:61:000000000-A8R49:1:1102:22449:24196;size=18;... at 1:87:1:87/98.85%\n+32\t87aa, >M00987:61:000000000-A8R49:1:1106:26512:12404;size=17;... at 1:87:1:87/98.85%\n+33\t87aa, >M00987:61:000000000-A8R49:1:1116:13744:9570;size=16;... at 1:87:1:87/98.85%\n+34\t87aa, >M00987:61:000000000-A8R49:1:2106:17649:17745;size=16;... at 1:87:1:87/98.85%\n+35\t87aa, >M00987:61:000000000-A8R49:1:2104:5163:10944;size=14;... at 1:87:1:87/97.70%\n+36\t87aa, >M00987:61:000000000-A8R49:1:2111:23969:11962;size=14;... at 1:87:1:87/96.55%\n+37\t87aa, >M00987:61:000000000-A8R49:1:2104:22733:15966;size=13;... at 1:87:1:87/98.85%\n+38\t87aa, >M00987:61:000000000-A8R49:1:2105:13835:11885;size=13;... at 1:87:1:87/98.85%\n+39\t87aa, >M00987:61:000000000-A8R49:1:2102:7359:7200;size=13;... at 1:87:1:87/98.85%\n+40\t87aa, >M00987:61:000000000-A8R49:1:2106:17099:8195;size=13;... at 1:87:1:87/98.85%\n+41\t87aa, >M00987:61:000000000-A8R49:1:2108:25287:9422;size=12;... at 1:87:1:87/98.85%\n+42\t87aa, >M00987:61:000000000-A8R49:1:2110:9246:20449;size=12;... at 1:87:1:87/98.85%\n+43\t87aa, >M00987:61:000000000-A8R49:1:2103:15193:20710;size=12;... at 1:87:1:87/97.70%\n+44\t87aa, >M00987:61:000000000-A8R49:1:2105:19261:13066;size=11;... at 1:87:1:87/98.85%\n+45\t87aa, >M00987:61:000000'..b'A8R49:1:2112:2473:9007;size=1;... *\n+>Cluster 2941\n+0\t83aa, >M00987:61:000000000-A8R49:1:2116:9729:5106;size=1;... *\n+>Cluster 2942\n+0\t83aa, >M00987:61:000000000-A8R49:1:1101:13404:23918;size=1;... *\n+>Cluster 2943\n+0\t83aa, >M00987:61:000000000-A8R49:1:1116:3512:19977;size=1;... *\n+>Cluster 2944\n+0\t83aa, >M00987:61:000000000-A8R49:1:1104:15132:1539;size=1;... *\n+>Cluster 2945\n+0\t83aa, >M00987:61:000000000-A8R49:1:2108:9213:25248;size=1;... *\n+>Cluster 2946\n+0\t83aa, >M00987:61:000000000-A8R49:1:2101:23411:21523;size=1;... *\n+>Cluster 2947\n+0\t83aa, >M00987:61:000000000-A8R49:1:2117:21893:24100;size=1;... *\n+>Cluster 2948\n+0\t83aa, >M00987:61:000000000-A8R49:1:2108:7483:15156;size=1;... *\n+>Cluster 2949\n+0\t83aa, >M00987:61:000000000-A8R49:1:1103:11029:21123;size=1;... *\n+>Cluster 2950\n+0\t83aa, >M00987:61:000000000-A8R49:1:2106:14556:1831;size=1;... *\n+>Cluster 2951\n+0\t83aa, >M00987:61:000000000-A8R49:1:1114:10975:24681;size=1;... *\n+>Cluster 2952\n+0\t83aa, >M00987:61:000000000-A8R49:1:2119:12013:22975;size=1;... *\n+>Cluster 2953\n+0\t83aa, >M00987:61:000000000-A8R49:1:2111:12826:24678;size=1;... *\n+>Cluster 2954\n+0\t83aa, >M00987:61:000000000-A8R49:1:1117:17750:2114;size=1;... *\n+>Cluster 2955\n+0\t83aa, >M00987:61:000000000-A8R49:1:2118:18578:19285;size=1;... *\n+>Cluster 2956\n+0\t83aa, >M00987:61:000000000-A8R49:1:2109:14931:24648;size=1;... *\n+>Cluster 2957\n+0\t83aa, >M00987:61:000000000-A8R49:1:2114:9931:22922;size=1;... *\n+>Cluster 2958\n+0\t83aa, >M00987:61:000000000-A8R49:1:1111:28341:12018;size=1;... *\n+>Cluster 2959\n+0\t83aa, >M00987:61:000000000-A8R49:1:2116:23628:18794;size=1;... *\n+>Cluster 2960\n+0\t82aa, >M00987:61:000000000-A8R49:1:2116:12734:11759;size=1;... *\n+>Cluster 2961\n+0\t82aa, >M00987:61:000000000-A8R49:1:2101:5997:6049;size=1;... *\n+>Cluster 2962\n+0\t82aa, >M00987:61:000000000-A8R49:1:1109:8899:24137;size=1;... *\n+>Cluster 2963\n+0\t82aa, >M00987:61:000000000-A8R49:1:2108:16023:12220;size=1;... *\n+>Cluster 2964\n+0\t82aa, >M00987:61:000000000-A8R49:1:2106:14796:24050;size=1;... *\n+>Cluster 2965\n+0\t82aa, >M00987:61:000000000-A8R49:1:2108:5296:5805;size=1;... *\n+>Cluster 2966\n+0\t82aa, >M00987:61:000000000-A8R49:1:1117:27170:19520;size=1;... *\n+>Cluster 2967\n+0\t82aa, >M00987:61:000000000-A8R49:1:2107:10633:22563;size=1;... *\n+>Cluster 2968\n+0\t82aa, >M00987:61:000000000-A8R49:1:2119:17073:14915;size=1;... *\n+>Cluster 2969\n+0\t82aa, >M00987:61:000000000-A8R49:1:2107:11872:2904;size=1;... *\n+>Cluster 2970\n+0\t82aa, >M00987:61:000000000-A8R49:1:2103:23352:20721;size=1;... *\n+>Cluster 2971\n+0\t82aa, >M00987:61:000000000-A8R49:1:1118:17805:1803;size=1;... *\n+>Cluster 2972\n+0\t82aa, >M00987:61:000000000-A8R49:1:1116:3496:19994;size=1;... *\n+>Cluster 2973\n+0\t82aa, >M00987:61:000000000-A8R49:1:1103:17615:5503;size=1;... *\n+>Cluster 2974\n+0\t82aa, >M00987:61:000000000-A8R49:1:2112:25257:22012;size=1;... *\n+>Cluster 2975\n+0\t82aa, >M00987:61:000000000-A8R49:1:1115:12165:1939;size=1;... *\n+>Cluster 2976\n+0\t82aa, >M00987:61:000000000-A8R49:1:2114:24719:20822;size=1;... *\n+>Cluster 2977\n+0\t82aa, >M00987:61:000000000-A8R49:1:1113:22195:23117;size=1;... *\n+>Cluster 2978\n+0\t82aa, >M00987:61:000000000-A8R49:1:2116:5453:20144;size=1;... *\n+>Cluster 2979\n+0\t82aa, >M00987:61:000000000-A8R49:1:1110:22226:13629;size=1;... *\n+>Cluster 2980\n+0\t82aa, >M00987:61:000000000-A8R49:1:1109:14537:23295;size=1;... *\n+>Cluster 2981\n+0\t82aa, >M00987:61:000000000-A8R49:1:2115:12165:3927;size=1;... *\n+>Cluster 2982\n+0\t81aa, >M00987:61:000000000-A8R49:1:2102:28728:13623;size=1;... *\n+>Cluster 2983\n+0\t81aa, >M00987:61:000000000-A8R49:1:2110:28234:12796;size=1;... *\n+>Cluster 2984\n+0\t81aa, >M00987:61:000000000-A8R49:1:2116:11217:22852;size=1;... *\n+>Cluster 2985\n+0\t81aa, >M00987:61:000000000-A8R49:1:2113:14377:19027;size=1;... *\n+>Cluster 2986\n+0\t81aa, >M00987:61:000000000-A8R49:1:2108:23914:18379;size=1;... *\n+>Cluster 2987\n+0\t80aa, >M00987:61:000000000-A8R49:1:1119:29328:16362;size=1;... *\n+>Cluster 2988\n+0\t80aa, >M00987:61:000000000-A8R49:1:2118:14214:20073;size=1;... *\n'
b
diff -r 000000000000 -r 4015e9d6d277 test-data/input_mapping.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_mapping.txt Tue Apr 26 08:55:33 2016 -0400
b
b'@@ -0,0 +1,147084 @@\n+M00987:61:000000000-A8R49:1:1103:21293:6262;size=1515;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2106:17121:22649;size=491;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2112:17537:2141;size=366;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1110:12700:1843;size=236;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2105:13510:5899;size=171;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2113:10672:5596;size=170;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2105:13866:12140;size=140;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1117:14644:4815;size=132;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1113:22826:20271;size=118;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2111:20895:21768;size=100;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2111:6354:6629;size=96;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1110:17065:16689;size=85;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1109:16753:15539;size=84;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2104:20140:17213;size=78;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1108:14116:21837;size=72;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2104:12433:22290;size=57;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1103:20251:11748;size=53;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2101:22220:15964;size=48;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2111:13291:23425;size=45;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2112:12127:11404;size=35;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1104:7152:13973;size=33;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2118:15157:12148;size=32;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2114:14767:2871;size=31;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2106:23792:18380;size=31;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2105:19077:5826;size=30;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2105:4649:8507;size=27;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1119:21204:10429;size=27;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2105:14405:18256;size=26;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2113:18522:20174;size=26;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2103:14130:17528;size=25;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2112:14594:11585;size=24;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2107:18506:8195;size=23;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2104:4621:13376;size=22;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1119:11102:11148;size=22;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2112:12179:14971;size=21;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1109:14535:24323;size=19;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1104:8133:4042;size=19;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2104:9344:19079;size=18;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1102:22449:24196;size=18;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1106:26512:12404;size=17;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:1116:13744:9570;size=16;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2106:17649:17745;size=16;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2104:5163:10944;size=14;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2111:23969:11962;size=14;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2104:22733:15966;size=13;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2105:13835:11885;size=13;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2102:7359:7200;size=13;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2106:17099:8195;size=13;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2108:25287:9422;size=12;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2108:12496:10590;size=12;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2110:9246:20449;size=12;\tamoA_arc_19F-TV1-T0-a\n+M00987:61:000000000-A8R49:1:2112:15752:9137;size=12;\tamoA_arc_19'..b'e=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2106:16892:19152;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2106:21108:21232;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2106:16718:21667;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2106:23029:20560;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2117:25572:14966;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2107:16363:2231;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2106:18676:19542;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2107:9078:3113;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:12720:15859;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:13055:11240;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:8604:24551;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:17295:4426;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:2423:13934;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:9713:10896;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:8472:10285;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:19752:20269;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:27561:12135;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:21130:14284;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:14540:13324;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1107:23816:12012;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1107:11120:12163;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:21048:6721;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1107:6385:16136;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1107:12670:11643;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:13726:2909;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:9999:2901;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:18201:23976;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:15345:6994;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:19996:5841;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1109:25721:9165;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:1108:7513:19680;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2117:20031:3257;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2107:22355:16614;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2107:18215:21161;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2117:9710:1408;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2116:17109:24104;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2114:13150:13983;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2107:18347:20349;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2112:4992:10540;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2107:5759:19748;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2108:13396:1451;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2116:20472:22816;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2114:18227:14998;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2108:10761:3270;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2108:10120:2591;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2112:17774:9843;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2112:8568:9382;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2114:22143:16200;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2108:27436:6442;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2112:21334:9652;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2108:8809:8690;size=1;\tamoA_arc_19F-TEM1-T1-d\n+M00987:61:000000000-A8R49:1:2108:20388:9247;size=1;\tamoA_arc_19F-TEM1-T1-d\n'
b
diff -r 000000000000 -r 4015e9d6d277 test-data/input_representative_sequences.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_representative_sequences.fasta Tue Apr 26 08:55:33 2016 -0400
b
b'@@ -0,0 +1,8974 @@\n+>M00987:61:000000000-A8R49:1:1102:17086:7758;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsishstmlavgaiyytlftgvp\n+gtttyyapiltihtwvakgacfalasp\n+>M00987:61:000000000-A8R49:1:1109:23380:21363;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiysmlftglp\n+gpaafsapfmpiypwlaneacfalgy\n+>M00987:61:000000000-A8R49:1:2116:19201:22336;size=1;\n+tthylligvvavnstlltinavdyiiytdwawtsfgvfsisqstmlavgpiyymlfstsq\n+gtvtyyapimtiytwvvkgawfalgyp\n+>M00987:61:000000000-A8R49:1:2119:28247:9459;size=1;\n+ttqylfivvvdvnstlltihageyifytdwawtsfvvfsiskstmlavraiyyllftgvt\n+gtatyyvsimtiytwvakragvalgys\n+>M00987:61:000000000-A8R49:1:2117:21466:7808;size=1;\n+tthylfivvvavtstlltinagdylfytdwewssfvvfsisqstmlavgaiyyllftgvp\n+gtatyystimpiynwvakgawlalgyp\n+>M00987:61:000000000-A8R49:1:2116:21501:16579;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvlsisqstmlavgaiyfmlftgvp\n+gtatyyatlmtittwvatgacfalgyr\n+>M00987:61:000000000-A8R49:1:2104:19736:23928;size=1;\n+tthylfivvvavnstlltikageyifytdwawasfvvfsifqstmlavgaiyfmlftgvp\n+gtdtyyatimtiytwfakgawfalvyp\n+>M00987:61:000000000-A8R49:1:1107:14852:1525;size=1;\n+tthylfivvvavnstlltinaadyifytdwactsfvvfsipqstmlpvgslytltspglp\n+gpatysatlmtiytwvakvawfaldsp\n+>M00987:61:000000000-A8R49:1:1101:11478:24325;size=1;\n+tthylfivvvavnstlltinagyyifytdsawtsfvvfsipqstmlavgaiyymlltgvp\n+gtatytatimtiytcvaqgswfafgyp\n+>M00987:61:000000000-A8R49:1:2107:28530:14570;size=1;\n+tthylfivvvvvnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymlftgvp\n+gtdtyydtmmtvytwvaqgawvalgyp\n+>M00987:61:000000000-A8R49:1:1111:9978:23565;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymllqgvp\n+gnatyyatiitiytwvatgacfslgyp\n+>M00987:61:000000000-A8R49:1:2104:10547:11369;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsiyqssmlavgaiydllftgvp\n+gtdtyyptfktiytwvvkgalialvyp\n+>M00987:61:000000000-A8R49:1:1104:7117:22367;size=1;\n+tthylflvvvavnstlltinagdyifytdwawpsfvvfsisqstmlvvgaiyymiftgvp\n+gtatyyatimtiytllskgacfplgyp\n+>M00987:61:000000000-A8R49:1:2101:4051:9017;size=1;\n+tthylfivvvavnstlltlnagdyifytdwawtsfvvfsisqspmlavaalyymlfpgvp\n+vtatyyatiitiytwfakgagfalgyp\n+>M00987:61:000000000-A8R49:1:1101:7964:16621;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgsiyymlftgvp\n+gtapyyptiitiyiwvatgawfdlvy\n+>M00987:61:000000000-A8R49:1:2102:28728:13623;size=1;\n+tnhdlfivvvavnstlltikagdyifytdwartsfvvfyifqstvltvgeiyymfftgvp\n+gnatyyatimtiytrvdkgaw\n+>M00987:61:000000000-A8R49:1:2106:10291:4215;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymlftgvp\n+vtatyyatiitistlvatgewfalvys\n+>M00987:61:000000000-A8R49:1:2113:11175:8813;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavglyymfftgvpg\n+tatyyaplmsiyiwvakgawfalgyiy\n+>M00987:61:000000000-A8R49:1:1117:6426:4737;size=1;\n+tthylfivvvavnstlltinagyyifytdwawtsfvvfsisqstmlvvgtihymlftgvp\n+vtspyyatimtlypwvatgawfalsyp\n+>M00987:61:000000000-A8R49:1:2118:10079:20240;size=1;\n+tthyifivivavnnklftinagdyifytdwawssfvvfsisqstmlavgaiyymlltgvp\n+giatyyatimtiytwvakvalialghp\n+>M00987:61:000000000-A8R49:1:1102:24010:5941;size=1;\n+tthylftvvvsvnstlltinagdyifytdwawtsfvvssisqstmlvfgaiyymlftgvp\n+gtatyyapiltiytwvakgacfasgyp\n+>M00987:61:000000000-A8R49:1:1106:7255:3687;size=1;\n+tthylfivlvavnstlltinagdyifytdwawtsfvvfsisqstmlsvgaiyymlftgip\n+gtatysatlmtiytwvakvacftlgyp\n+>M00987:61:000000000-A8R49:1:2109:15442:11999;size=1;\n+tthylfivvvainstlltintgeyifytdwawtsfvvfsisqstmiavgaisymqftglp\n+gtatyyatimtlytmvakgawfalvyp\n+>M00987:61:000000000-A8R49:1:1117:26225:5333;size=1;\n+tkhylfivvvavnstlltitagdyifytdwawtsfvvfsisqstmlavgtiyymmftgvt\n+gtatyyatimtiytlvakdawfafgyp\n+>M00987:61:000000000-A8R49:1:1118:18155:5055;size=1;\n+tthylfivvvavnstlltinagdyifytdwmwssfvvfsisqstmlvagaisymlftgfp\n+gtatyyatlmpiytwvakgewfslgyt\n+>M00987:61:000000000-A8R49:1:1116:16023:4869;size=1;\n+tthylfivvvavhrtlltinagdyifytdwawtsfvvfsisqsrmvvggeiycmlvtgvp\n+gtatdyatsitiytwvvkaqwfalg\n+>M00987:61:000000000-A8R49:1:2117:20769:1396;size=1;\n+tthylfivvvavnstlltinagdyifytdwiwssfvvfsisqhtmlvvgaiyymlftrvt\n+gtatyydtlmtvytwvdkgawfals\n+>M00987:61:000000000-A8R49:1:1114:23545:10945;size=1;\n+tthylfivvvavns'..b'115:21048:4626;size=1;\n+tthysfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyyllltgvp\n+gtttyyapimtiytwvaqgawfpfg\n+>M00987:61:000000000-A8R49:1:2112:19813:2829;size=1;\n+tthyifivvvavnstlltikagdyifytdwawtsfvvfslyqsnmlvvsklyymlftglt\n+giatyyppfmiittwlangpwfal\n+>M00987:61:000000000-A8R49:1:2105:8580:23098;size=1;\n+tthylfivvvavnsrlltinegdyifctdwawtsfvvfsvsqstmvvvgaiyymlltgvp\n+gtatyyatistiytgvakgalfslg\n+>M00987:61:000000000-A8R49:1:1111:3394:6882;size=1;\n+tthnlfivvvavnstmitinagdyifytdwawtsfvvfsisqstmlvvlaiyymlftevp\n+gtakycaaimtiytwvdkgaccalgyp\n+>M00987:61:000000000-A8R49:1:2109:9711:17307;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsiyqstlfvvgafysllstgfp\n+etetyfatirtiyiwvpkgacfalesq\n+>M00987:61:000000000-A8R49:1:1116:16013:24938;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqctmlavgaisymlftgvp\n+gtatysatimtistlvakgawlpfghh\n+>M00987:61:000000000-A8R49:1:2112:28512:13900;size=1;\n+tthylfivvvavnstlltinageyifytdwawtsfvvfsipqstmvavgaiyymlftgvp\n+gtatyndtfmiiytwvykgacvgcgy\n+>M00987:61:000000000-A8R49:1:2113:26904:19977;size=1;\n+tthylfivvvalnstlltinagdyifytdwewtsfvvssisqstmlvvgaiyymlltgvp\n+gtesyyatiltiytwvvngvelalgy\n+>M00987:61:000000000-A8R49:1:2103:17745:20405;size=1;\n+tthylfivvvavdstlltinagdylfytdwawtsfavfsisqskmlvvgaiyyvlfpgva\n+gtdtyyatimtiytcvakgawfalg\n+>M00987:61:000000000-A8R49:1:2115:12165:3927;size=1;\n+tthylfivvvavnstqltinagdyifytdwawtsfvvfsishstmpavgaryymlftgvp\n+gtatsyatimiiytwlakgawl\n+>M00987:61:000000000-A8R49:1:2105:26676:5925;size=1;\n+tthylfifvfavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyyilftgvp\n+gtapyystfrtiytlvakgpwfal\n+>M00987:61:000000000-A8R49:1:2116:23628:18794;size=1;\n+ttnylfivvvavnstlltinagdylfytdwawtsfvvfpisqstmlavgaiyymlltgdp\n+gtatynatiltmypwvatcawfa\n+>M00987:61:000000000-A8R49:1:2117:14000:14614;size=1;\n+tthylfivvvavnstlltlnagdyifytdtawtlfvvfsisqstmlvvgaiyyvllpgvp\n+vtatyyattmtiymwvakgawfalgy\n+>M00987:61:000000000-A8R49:1:2102:26452:19816;size=1;\n+tthylffvvvavtstlltinlgdyifyidwawssfvvfsvlqstmlvvgsiysmfftgvp\n+gtstyyatimtiytwfakgawfalgs\n+>M00987:61:000000000-A8R49:1:2115:5084:11658;size=1;\n+ttpylfivvvavnrtlrtiiagdyifytdwawtslvvfsisqstmlavaaiyymlstgvp\n+vtatyyatiitiytwvakgawfalg\n+>M00987:61:000000000-A8R49:1:1109:21731:9118;size=1;\n+tthylfivlvavsstlltiyagdyifytdwawtsflvfsisqstmlvvgaiyymlftgvp\n+gtatsyatimtiptslakaasfsla\n+>M00987:61:000000000-A8R49:1:2103:10521:2635;size=1;\n+ttqylfivvvavnstlltinagdyifytdrmwssfvvvsisqstmlvvgaiyymlftgvp\n+gtatyhatimtvytcvakgawlvlc\n+>M00987:61:000000000-A8R49:1:1103:6439:19498;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtssevfsisqskrlvfgaffylfftglp\n+gpatfyaptmpsypwlpkglwfpley\n+>M00987:61:000000000-A8R49:1:2110:4729:13826;size=1;\n+tthylfivvvavnstlltsnagdyifytdwawtsfvvlsisqskmlevgaiyymlftgvp\n+gtatayatiltiytlvdqgawfalgy\n+>M00987:61:000000000-A8R49:1:2112:4552:11368;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgplnsmpfpgvp\n+gtvtfyvtimqiytgvskgewfalgy\n+>M00987:61:000000000-A8R49:1:1101:25030:4052;size=1;\n+ttnnlfivvvaanstlitinagdyifytdwmwssfvvfsitqstmlvvgaiyymlltgvp\n+gtatyydtimtiytwvakgawfalgy\n+>M00987:61:000000000-A8R49:1:2111:4121:11479;size=1;\n+tthdlfivvvavnsplltinagdyilytdwactsfvvfslsqstmlvvgaiyymlftgvl\n+gtatyyatimtlstwvangsclvlgs\n+>M00987:61:000000000-A8R49:1:2104:2792:17942;size=1;\n+tthylfivvvavnstlitinagdysfytdwmwssfvvfsisqstmvvvgaiyymlftggp\n+gtatyyatimtistwvakgagcalgy\n+>M00987:61:000000000-A8R49:1:2102:28258:17985;size=1;\n+tthylfivvvavnstlltikagdyifytdwawtsfvvfsisqstmlvvgeiyyllftgvp\n+gtatdyapfitiypcvakvawfvlvyp\n+>M00987:61:000000000-A8R49:1:1102:11817:5253;size=1;\n+tthylflvvvavnsklltinagdsilytdwawtsfvvfsisqstmlvvgaiyymiftgvt\n+gtgtyyatimtiypwvakgawfalgyp\n+>M00987:61:000000000-A8R49:1:1110:3634:12171;size=1;\n+tthylfivvvavnstlltinagdyilytdwawtsfvvlsisqstmlavgaidyvlftevp\n+gtaayydtsmtiysgvakgdwlalggp\n+>M00987:61:000000000-A8R49:1:2112:21595:18310;size=1;\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyympftgvp\n+gpatyfspfmttftwvvkgawfalgfp\n+>M00987:61:000000000-A8R49:1:2112:8568:9382;size=1;\n+pthylfivvvavnstlltinagdyifytdwawtsflvfsisqstmlvvgaiyymlitgvp\n+gtatyyatfmtiytllakdsllslgyp\n'
b
diff -r 000000000000 -r 4015e9d6d277 test-data/rename_representative_sequences_output.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rename_representative_sequences_output.fasta Tue Apr 26 08:55:33 2016 -0400
b
b'@@ -0,0 +1,8974 @@\n+>Cluster_30\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsishstmlavgaiyytlftgvp\n+gtttyyapiltihtwvakgacfalasp\n+>Cluster_2051\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiysmlftglp\n+gpaafsapfmpiypwlaneacfalgy\n+>Cluster_31\n+tthylligvvavnstlltinavdyiiytdwawtsfgvfsisqstmlavgpiyymlfstsq\n+gtvtyyapimtiytwvvkgawfalgyp\n+>Cluster_32\n+ttqylfivvvdvnstlltihageyifytdwawtsfvvfsiskstmlavraiyyllftgvt\n+gtatyyvsimtiytwvakragvalgys\n+>Cluster_33\n+tthylfivvvavtstlltinagdylfytdwewssfvvfsisqstmlavgaiyyllftgvp\n+gtatyystimpiynwvakgawlalgyp\n+>Cluster_34\n+tthylfivvvavnstlltinagdyifytdwawtsfvvlsisqstmlavgaiyfmlftgvp\n+gtatyyatlmtittwvatgacfalgyr\n+>Cluster_35\n+tthylfivvvavnstlltikageyifytdwawasfvvfsifqstmlavgaiyfmlftgvp\n+gtdtyyatimtiytwfakgawfalvyp\n+>Cluster_36\n+tthylfivvvavnstlltinaadyifytdwactsfvvfsipqstmlpvgslytltspglp\n+gpatysatlmtiytwvakvawfaldsp\n+>Cluster_37\n+tthylfivvvavnstlltinagyyifytdsawtsfvvfsipqstmlavgaiyymlltgvp\n+gtatytatimtiytcvaqgswfafgyp\n+>Cluster_38\n+tthylfivvvvvnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymlftgvp\n+gtdtyydtmmtvytwvaqgawvalgyp\n+>Cluster_39\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymllqgvp\n+gnatyyatiitiytwvatgacfslgyp\n+>Cluster_40\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsiyqssmlavgaiydllftgvp\n+gtdtyyptfktiytwvvkgalialvyp\n+>Cluster_41\n+tthylflvvvavnstlltinagdyifytdwawpsfvvfsisqstmlvvgaiyymiftgvp\n+gtatyyatimtiytllskgacfplgyp\n+>Cluster_42\n+tthylfivvvavnstlltlnagdyifytdwawtsfvvfsisqspmlavaalyymlfpgvp\n+vtatyyatiitiytwfakgagfalgyp\n+>Cluster_2052\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgsiyymlftgvp\n+gtapyyptiitiyiwvatgawfdlvy\n+>Cluster_2982\n+tnhdlfivvvavnstlltikagdyifytdwartsfvvfyifqstvltvgeiyymfftgvp\n+gnatyyatimtiytrvdkgaw\n+>Cluster_43\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyymlftgvp\n+vtatyyatiitistlvatgewfalvys\n+>Cluster_44\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavglyymfftgvpg\n+tatyyaplmsiyiwvakgawfalgyiy\n+>Cluster_45\n+tthylfivvvavnstlltinagyyifytdwawtsfvvfsisqstmlvvgtihymlftgvp\n+vtspyyatimtlypwvatgawfalsyp\n+>Cluster_46\n+tthyifivivavnnklftinagdyifytdwawssfvvfsisqstmlavgaiyymlltgvp\n+giatyyatimtiytwvakvalialghp\n+>Cluster_47\n+tthylftvvvsvnstlltinagdyifytdwawtsfvvssisqstmlvfgaiyymlftgvp\n+gtatyyapiltiytwvakgacfasgyp\n+>Cluster_48\n+tthylfivlvavnstlltinagdyifytdwawtsfvvfsisqstmlsvgaiyymlftgip\n+gtatysatlmtiytwvakvacftlgyp\n+>Cluster_49\n+tthylfivvvainstlltintgeyifytdwawtsfvvfsisqstmiavgaisymqftglp\n+gtatyyatimtlytmvakgawfalvyp\n+>Cluster_50\n+tkhylfivvvavnstlltitagdyifytdwawtsfvvfsisqstmlavgtiyymmftgvt\n+gtatyyatimtiytlvakdawfafgyp\n+>Cluster_51\n+tthylfivvvavnstlltinagdyifytdwmwssfvvfsisqstmlvagaisymlftgfp\n+gtatyyatlmpiytwvakgewfslgyt\n+>Cluster_2630\n+tthylfivvvavhrtlltinagdyifytdwawtsfvvfsisqsrmvvggeiycmlvtgvp\n+gtatdyatsitiytwvvkaqwfalg\n+>Cluster_2631\n+tthylfivvvavnstlltinagdyifytdwiwssfvvfsisqhtmlvvgaiyymlftrvt\n+gtatyydtlmtvytwvdkgawfals\n+>Cluster_52\n+tthylfivvvavnsplltinagdyifytdwmwpsfvvfslfqstmlavaaiyymlftgvp\n+gtatyyspimtiytwvtkgawfalgyr\n+>Cluster_53\n+tthdlfivvvavnstlltikagdyifytdwawtsfgvfsisqstmltvgaiyymlfkvvp\n+gtatyyatimtiytlvakgawfafgyh\n+>Cluster_54\n+tthylfivvvavnstlltinagdyifytdwartsfvvfsisqstllvvgaiyymlftgvp\n+gtatyyenimtvytwvangagfafgyp\n+>Cluster_2053\n+tthywlivvvavnstlitinagdyifytdwawtsfvvfsisqstmiavgaiyymmftgvp\n+gtatyyatlmtiytwvakgagfalgy\n+>Cluster_2054\n+tthylfivvvavnntlltinagdyifytdwawtsfvvfsisqstmfavgaiyymlftgvp\n+etatysasimpiytwvakgawfafgy\n+>Cluster_2055\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyymlftgfl\n+gpppiyptslqvntgvakvhcfsley\n+>Cluster_2056\n+tthylfivvvavnstlltinaenyifytdwewtsfvvfsisqstmlvvgaiyymlstglp\n+gtakyyapfmtiytgvakgawfalgy\n+>Cluster_2057\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsvsqstrlvvgaidymlftgvp\n+gmatyyatimtintrvakgacfalvh\n+>Cluster_55\n+tthylfivvvavnstlltinagdyifytdwxwtiivvfliskltkfvvcaihfllfngvp\n+gtannyatiltnhtwvakgawfalgyp\n+>Cluster_56\n+tthylfivvvavnstlltinagdyifytdwawtsflvfsisqstmlvvgaiyymlftgvp\n+gpatyyspfitistwvpkgawfaleyp\n+>Cluster_57\n+tthylfigvvavnstlltinagdcifytdwgwtsfvvfsisqstmlavgavyyllfrgvp\n+gtatyyatlmtiytwvakaawfalvyt\n+>Cluster_58\n+tthylfigvva'..b'iiytwvakgawfalgyh\n+>Cluster_2040\n+tthylfivvvavnstlltinagdyifytdwmwssfvvfsisqstmlvvgaiyfmlftgfp\n+gtatyyatimtistwvakgalfafsyl\n+>Cluster_2041\n+tthylfivvvavtrtfltiipatfifktdwvwlsllffsfwnsrwlvlgqisyfpflxip\n+wtaaylsynlaifnwvgkgsmfsfgfp\n+>Cluster_2614\n+tthylfivvvavnstlltlnagdyifytdwawtsfvvfsishstmlvvaaiyymlftavp\n+vtvtyydtimtlyigvapgawfalgy\n+>Cluster_2615\n+tthylfivvvavnstlltinagdyifytdlawtsflvfsisqstmlavgaiyyvlltgvp\n+gtatsyptimtiyllvakgalvalgy\n+>Cluster_2616\n+tthylfivvvavnstllainpgdyifytewawtsfvvfsvskstmlvvgaiyymlftgvp\n+gtatysatlmtistwvakgagfalgy\n+>Cluster_2617\n+tthylfivvvavnstmltinagdyifctdcawtsfvvfsisqstmlvvgaiyymlftgvp\n+gtatyystimtiytwvakaalialay\n+>Cluster_2618\n+ttpylfivvvavnstlltinagdyifytdwvwtsfvvfsisqstmlvvgaiyymlftevp\n+gtvtyyativtiytwvaevvwfalry\n+>Cluster_2619\n+pthylfivvvavnsplltihagdyifytdwactsfvvfsisqstmlvvgaiyymlftvfp\n+gtatyyatimpiytvvskfacfalgy\n+>Cluster_2042\n+tthylfivvvavnstlltinagdyifytdwmwtsfvvfsisqstmlavgaisymlftvfp\n+gtatyyatimtiytwvvkggwfelgnr\n+>Cluster_2958\n+tphylfivvvavtstlitiiegdyifytdsawtsfvvfsisistmlvvvsilymlltrvp\n+gtatyyattltfytwvpigswla\n+>Cluster_2791\n+tthysfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlavgaiyyllltgvp\n+gtttyyapimtiytwvaqgawfpfg\n+>Cluster_2905\n+tthyifivvvavnstlltikagdyifytdwawtsfvvfslyqsnmlvvsklyymlftglt\n+giatyyppfmiittwlangpwfal\n+>Cluster_2792\n+tthylfivvvavnsrlltinegdyifctdwawtsfvvfsvsqstmvvvgaiyymlltgvp\n+gtatyyatistiytgvakgalfslg\n+>Cluster_2043\n+tthnlfivvvavnstmitinagdyifytdwawtsfvvfsisqstmlvvlaiyymlftevp\n+gtakycaaimtiytwvdkgaccalgyp\n+>Cluster_2044\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsiyqstlfvvgafysllstgfp\n+etetyfatirtiyiwvpkgacfalesq\n+>Cluster_2045\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqctmlavgaisymlftgvp\n+gtatysatimtistlvakgawlpfghh\n+>Cluster_2620\n+tthylfivvvavnstlltinageyifytdwawtsfvvfsipqstmvavgaiyymlftgvp\n+gtatyndtfmiiytwvykgacvgcgy\n+>Cluster_2621\n+tthylfivvvalnstlltinagdyifytdwewtsfvvssisqstmlvvgaiyymlltgvp\n+gtesyyatiltiytwvvngvelalgy\n+>Cluster_2793\n+tthylfivvvavdstlltinagdylfytdwawtsfavfsisqskmlvvgaiyyvlfpgva\n+gtdtyyatimtiytcvakgawfalg\n+>Cluster_2981\n+tthylfivvvavnstqltinagdyifytdwawtsfvvfsishstmpavgaryymlftgvp\n+gtatsyatimiiytwlakgawl\n+>Cluster_2906\n+tthylfifvfavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyyilftgvp\n+gtapyystfrtiytlvakgpwfal\n+>Cluster_2959\n+ttnylfivvvavnstlltinagdylfytdwawtsfvvfpisqstmlavgaiyymlltgdp\n+gtatynatiltmypwvatcawfa\n+>Cluster_2622\n+tthylfivvvavnstlltlnagdyifytdtawtlfvvfsisqstmlvvgaiyyvllpgvp\n+vtatyyattmtiymwvakgawfalgy\n+>Cluster_2623\n+tthylffvvvavtstlltinlgdyifyidwawssfvvfsvlqstmlvvgsiysmfftgvp\n+gtstyyatimtiytwfakgawfalgs\n+>Cluster_2794\n+ttpylfivvvavnrtlrtiiagdyifytdwawtslvvfsisqstmlavaaiyymlstgvp\n+vtatyyatiitiytwvakgawfalg\n+>Cluster_2795\n+tthylfivlvavsstlltiyagdyifytdwawtsflvfsisqstmlvvgaiyymlftgvp\n+gtatsyatimtiptslakaasfsla\n+>Cluster_2796\n+ttqylfivvvavnstlltinagdyifytdrmwssfvvvsisqstmlvvgaiyymlftgvp\n+gtatyhatimtvytcvakgawlvlc\n+>Cluster_2624\n+tthylfivvvavnstlltinagdyifytdwawtssevfsisqskrlvfgaffylfftglp\n+gpatfyaptmpsypwlpkglwfpley\n+>Cluster_2625\n+tthylfivvvavnstlltsnagdyifytdwawtsfvvlsisqskmlevgaiyymlftgvp\n+gtatayatiltiytlvdqgawfalgy\n+>Cluster_2626\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgplnsmpfpgvp\n+gtvtfyvtimqiytgvskgewfalgy\n+>Cluster_2627\n+ttnnlfivvvaanstlitinagdyifytdwmwssfvvfsitqstmlvvgaiyymlltgvp\n+gtatyydtimtiytwvakgawfalgy\n+>Cluster_2628\n+tthdlfivvvavnsplltinagdyilytdwactsfvvfslsqstmlvvgaiyymlftgvl\n+gtatyyatimtlstwvangsclvlgs\n+>Cluster_2629\n+tthylfivvvavnstlitinagdysfytdwmwssfvvfsisqstmvvvgaiyymlftggp\n+gtatyyatimtistwvakgagcalgy\n+>Cluster_2046\n+tthylfivvvavnstlltikagdyifytdwawtsfvvfsisqstmlvvgeiyyllftgvp\n+gtatdyapfitiypcvakvawfvlvyp\n+>Cluster_2047\n+tthylflvvvavnsklltinagdsilytdwawtsfvvfsisqstmlvvgaiyymiftgvt\n+gtgtyyatimtiypwvakgawfalgyp\n+>Cluster_2048\n+tthylfivvvavnstlltinagdyilytdwawtsfvvlsisqstmlavgaidyvlftevp\n+gtaayydtsmtiysgvakgdwlalggp\n+>Cluster_2049\n+tthylfivvvavnstlltinagdyifytdwawtsfvvfsisqstmlvvgaiyympftgvp\n+gpatyfspfmttftwvvkgawfalgfp\n+>Cluster_2050\n+pthylfivvvavnstlltinagdyifytdwawtsflvfsisqstmlvvgaiyymlitgvp\n+gtatyyatfmtiytllakdsllslgyp\n'