changeset 11:c0c9d19bc7b2 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 746497a64b955f6b9afc1944d1c1d8d877e53267
author rnateam
date Tue, 18 Jul 2017 01:43:49 -0400
parents 16bcaef3dc1e
children 8a1786cdcf95
files preprocessing.xml splitSHAPE.py splitStockholm.py test-data/sample_4_alignment_data_split.stk test-data/sample_4_all.stk test-data/sample_4_representatives.fa
diffstat 6 files changed, 1005 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/preprocessing.xml	Thu Jun 01 12:11:37 2017 -0400
+++ b/preprocessing.xml	Tue Jul 18 01:43:49 2017 -0400
@@ -1,7 +1,8 @@
-<tool id="preproc" name="Preprocessing" version="0.2">
+<tool id="preproc" name="Preprocessing" version="0.3">
   <requirements>
-    <requirement type="package" version="0.1.12">graphclust-wrappers</requirement>
+    <requirement type="package" version="0.3.1">graphclust-wrappers</requirement>
     <requirement type="package" version="3.0">zip</requirement>
+    <requirement type="package" version="1.70">biopython</requirement>
 
   </requirements>
     <stdio>
@@ -19,13 +20,22 @@
             &&
             python '$__tool_directory__/splitSHAPE.py' 
                 '$SHAPEdata'
-                $max_length
+                
         #end if
+
+        #if $AlignmentData:
+            &&
+            python '$__tool_directory__/splitStockholm.py' 
+                '$AlignmentData'
+                
+        #end if
+             
 ]]>
 	</command>
     <inputs>
         <param type="data" name="fastaFile" format="fasta" />
         <param type="data" name="SHAPEdata" format="txt" optional="true" label="SHAPE data"/>
+        <param type="data" name="AlignmentData" format="stockholm" optional="true" label="Alignments file"/>
         <param name="max_length" type="integer" value="10000" size="5" label="window size"/>
         <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/>
         <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/>
@@ -36,7 +46,8 @@
         <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/>
         <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/>
         <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/>
-        <data name="shape_data_split" format="txt" from_work_dir="shape_data_split.react" label="SHAPE data splited"/>
+        <data name="shape_data_split" format="txt" from_work_dir="shape_data_split.react" label="SHAPE.data.split"/>
+        <data name="alignment_data_split" format="stockholm" from_work_dir="alignment_data_split.stk" label="alignments.data.stk"/>
     </outputs>
     <tests>
         <test>
@@ -57,6 +68,14 @@
             <param name="in_winShift" value="50"/>
             <param name="min_seq_length" value="5"/>
             <output name="shape_data_split" file="sample_3_shape_data_split.react" />
+        </test> 
+        <test>
+            <param name="fastaFile" value="sample_4_representatives.fa"/>
+            <param name="AlignmentData" value="sample_4_all.stk"/>
+            <param name="max_length" value="50"/>
+            <param name="in_winShift" value="50"/>
+            <param name="min_seq_length" value="5"/>
+            <output name="alignment_data_split" file="sample_4_alignment_data_split.stk" />
         </test>        
     </tests>
     <help>
--- a/splitSHAPE.py	Thu Jun 01 12:11:37 2017 -0400
+++ b/splitSHAPE.py	Tue Jul 18 01:43:49 2017 -0400
@@ -3,7 +3,6 @@
 import sys
 
 shape_file = sys.argv[1]
-win_size = int(sys.argv[2])
 
 pattern = re.compile("^>.*$")
 toWrite = ""
@@ -18,9 +17,9 @@
 name_file = "FASTA/data.names"
 array_all_chunks = []
 with open(name_file, 'r') as f:
-    content = f.read()
-    lines = content.split('\n')[:-1]
-    for line in lines:
+    for line in f:
+        if len(line.strip()) == 0:
+            continue
         seq_id.append(int(line.split()[0]))
         seq_string.append(line.split()[1])
         orig_id_srt = line.split()[3]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/splitStockholm.py	Tue Jul 18 01:43:49 2017 -0400
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+########
+# This script reads multiple alignments merged in single stockholm file
+# and splits the alignment blocks according to data.names table
+# The first sequence of each alignment file assumed to match to names table entries
+# Author: M. Miladi
+########
+import os
+import re
+import sys
+
+from Bio import AlignIO, SeqIO
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+stk_file = sys.argv[1]
+print ("Parsing and splitting stk file:{}".format(stk_file))
+target_f = "alignment_data_split.stk"
+pattern = re.compile("^>.*$")
+toWriteID = ""
+
+count_for_id = 1
+seq_counter = 0
+new_id = ""
+
+seq_id = []
+seq_string = []
+orig_id = []
+name_file = "FASTA/data.names"
+array_all_chunks = []
+with open(name_file, 'r') as f:
+    for line in f:
+        if len(line.strip()) == 0:
+            continue
+        seq_id.append(int(line.split()[0]))
+        seq_string.append(line.split()[1])
+        orig_id_srt = line.split()[3]
+        orig_id_srt = orig_id_srt.rsplit('_',1)[0]
+        orig_id.append(orig_id_srt)
+
+
+
+with open(stk_file) as stk_in:
+    alignments = AlignIO.parse(stk_in, "stockholm")#, alphabet=IUPAC.ambiguous_rna)  
+    alignments_dic = {(a[0].id):a for a in alignments}
+
+
+regx_gaps = '[-.~_]'  # valid gap symbols all be converted to "-"
+str_gaps = '-.~_'  # valid gap symbols all be converted to "-"
+
+
+chunks = []
+with open(target_f, 'w') as out_stk_handle:
+    for i in range(len(orig_id)):
+        
+        #----------------------
+        # We need to map ungapped positions of the chunks to gapped positions of first sequence 
+        gap_count = 0
+        ungap_ind = 0
+        dic_gap_counts = dict()
+        cur_alignment = alignments_dic[orig_id[i]]
+        for c in cur_alignment[0].seq:
+            #print ungap_ind
+            if c in str_gaps:
+                gap_count += 1
+            else:
+                dic_gap_counts[ungap_ind] = gap_count
+                ungap_ind += 1
+        ID =  str(seq_id[i]) + " " + seq_string[i] 
+        chunks = re.findall(r'\d+', seq_string[i])
+        print (ID,chunks)
+
+        index_start, index_end =int(chunks[1])-1, int(chunks[2])-1
+        subalign = cur_alignment[:, index_start + dic_gap_counts[index_start]:
+                           index_end+dic_gap_counts[index_end]+1]
+        
+        #----------------------
+        # BioPython does not handel the GF ID entry for alignment
+        # So we add entry in the second line manually
+        siotmp = StringIO()
+        AlignIO.write(subalign, siotmp, format="stockholm")
+        stk_lines = siotmp.getvalue().split('\n')
+        out_stk_handle.write('{}\n'.format(stk_lines[0]))
+        out_stk_handle.write('#=GF ID {}\n'.format(ID))
+        out_stk_handle.writelines('\n'.join(stk_lines[1:]))
+        #print out_stk_handle.getvalue()
+
+        
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_4_alignment_data_split.stk	Tue Jul 18 01:43:49 2017 -0400
@@ -0,0 +1,798 @@
+# STOCKHOLM 1.0
+#=GF ID 1 SEQ1#1#50#+
+#=GF SQ 5
+ath1 TAACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTGTCGTTTCCACG
+#=GS ath1 AC ath1
+#=GS ath1 DE ath1
+bol1 TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACG
+#=GS bol1 AC bol1
+#=GS bol1 DE bol1
+bna1 TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACG
+#=GS bna1 AC bna1
+#=GS bna1 DE bna1
+bra1 TTACTCGGAAGTTGTCAACTAGGGAACCGCGAGGTTTTGTCGCCTTGACG
+#=GS bra1 AC bra1
+#=GS bra1 DE bra1
+aly1 TTACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTATCGTCTTCACA
+#=GS aly1 AC aly1
+#=GS aly1 DE aly1
+//
+# STOCKHOLM 1.0
+#=GF ID 2 SEQ1#26#75#+
+#=GF SQ 5
+ath1 AACTTGAGGTTTTGTCGTTTCCACG---------GCTGTCGTAGACGGTGGCAGCTGCT
+#=GS ath1 AC ath1
+#=GS ath1 DE ath1
+bol1 ACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGTGGCACG-
+#=GS bol1 AC bol1
+#=GS bol1 DE bol1
+bna1 ACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGCGGCACG-
+#=GS bna1 AC bna1
+#=GS bna1 DE bna1
+bra1 ACCGCGAGGTTTTGTCGCCTTGACGGTCTTCAACACCGTCGTCGATGGT---GGCACG-
+#=GS bra1 AC bra1
+#=GS bra1 DE bra1
+aly1 AACTTGAGGTTTTATCGTCTTCACATCTCTCACCGCCGCCGGAGACGGTGGCTGCTGCT
+#=GS aly1 AC aly1
+#=GS aly1 DE aly1
+//
+# STOCKHOLM 1.0
+#=GF ID 3 SEQ1#51#100#+
+#=GF SQ 5
+ath1 GCTGTCGTAGACGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTA
+#=GS ath1 AC ath1
+#=GS ath1 DE ath1
+bol1 ACCGCTGTTGATGGTGGTGGCACG---TGTAACGTTTGGTGGTTATAGTA
+#=GS bol1 AC bol1
+#=GS bol1 DE bol1
+bna1 ACCGCTGTTGATGGTGGCGGCACG---TGTAACGTTTGATGGTTATAGTA
+#=GS bna1 AC bna1
+#=GS bna1 DE bna1
+bra1 ACCGTCGTCGATGGT---GGCACG---------------TGATTATAGTA
+#=GS bra1 AC bra1
+#=GS bra1 DE bra1
+aly1 GCCGCCGGAGACGGTGGCTGCTGCTGCTGCAGCGGTTGATGAGAGTAGTA
+#=GS aly1 AC aly1
+#=GS aly1 DE aly1
+//
+# STOCKHOLM 1.0
+#=GF ID 4 SEQ1#62#111#+
+#=GF SQ 5
+ath1 CGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTAGGCGGAGAAGT
+#=GS ath1 AC ath1
+#=GS ath1 DE ath1
+bol1 TGGTGGTGGCACG---TGTAACGTTTGGTGGTTATAGTAAGCTGTCAAGT
+#=GS bol1 AC bol1
+#=GS bol1 DE bol1
+bna1 TGGTGGCGGCACG---TGTAACGTTTGATGGTTATAGTAAGCTGTCAAGT
+#=GS bna1 AC bna1
+#=GS bna1 DE bna1
+bra1 TGGT---GGCACG---------------TGATTATAGTATGCGGTCAAGT
+#=GS bra1 AC bra1
+#=GS bra1 DE bra1
+aly1 CGGTGGCTGCTGCTGCTGCAGCGGTTGATGAGAGTAGTAGGCGGAGAAGT
+#=GS aly1 AC aly1
+#=GS aly1 DE aly1
+//
+# STOCKHOLM 1.0
+#=GF ID 5 SEQ2#1#50#+
+#=GF SQ 5
+ath2 GGTCGAGAAAGGAACCGGCAATATCGAACCGGAAAAATCCGAGATAACCA
+#=GS ath2 AC ath2
+#=GS ath2 DE ath2
+bol2 AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCA
+#=GS bol2 AC bol2
+#=GS bol2 DE bol2
+bna2 AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCA
+#=GS bna2 AC bna2
+#=GS bna2 DE bna2
+bra2 AGCAGAGAACGGAACCGGTAAAATCGAACCGGTTGAACCGGAGCTGACCA
+#=GS bra2 AC bra2
+#=GS bra2 DE bra2
+aly2 GGTCGAGAAAGGAACCGGCAAAATCGAACCGGAAACATTCGAGCTAACCA
+#=GS aly2 AC aly2
+#=GS aly2 DE aly2
+//
+# STOCKHOLM 1.0
+#=GF ID 6 SEQ2#26#75#+
+#=GF SQ 5
+ath2 GAACCGGAAAAATCCGAGATAACCACGTTTTGCATAAACTGGTACATAAG
+#=GS ath2 AC ath2
+#=GS ath2 DE ath2
+bol2 GAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAG
+#=GS bol2 AC bol2
+#=GS bol2 DE bol2
+bna2 GAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAG
+#=GS bna2 AC bna2
+#=GS bna2 DE bna2
+bra2 GAACCGGTTGAACCGGAGCTGACCACGTTCTGCATAAACTGGTACAAAAG
+#=GS bra2 AC bra2
+#=GS bra2 DE bra2
+aly2 GAACCGGAAACATTCGAGCTAACCACGTTTTGCATAAACTGGTACATAAG
+#=GS aly2 AC aly2
+#=GS aly2 DE aly2
+//
+# STOCKHOLM 1.0
+#=GF ID 7 SEQ2#51#100#+
+#=GF SQ 5
+ath2 CGTTTTGCATAAACTGGTACATAAGCAGAACGTCACCGTTAACCAAAGCC
+#=GS ath2 AC ath2
+#=GS ath2 DE ath2
+bol2 CGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCC
+#=GS bol2 AC bol2
+#=GS bol2 DE bol2
+bna2 CGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAGCGCC
+#=GS bna2 AC bna2
+#=GS bna2 DE bna2
+bra2 CGTTCTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCC
+#=GS bra2 AC bra2
+#=GS bra2 DE bra2
+aly2 CGTTTTGCATAAACTGGTACATAAGCAAAACGTCACCGTTAACCAAAGCC
+#=GS aly2 AC aly2
+#=GS aly2 DE aly2
+//
+# STOCKHOLM 1.0
+#=GF ID 8 SEQ2#71#120#+
+#=GF SQ 5
+ath2 ATAAGCAGAACGTCACCGTTAACCAAAGCCATGTCCTTAAACCGGTCTCG
+#=GS ath2 AC ath2
+#=GS ath2 DE ath2
+bol2 AAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG
+#=GS bol2 AC bol2
+#=GS bol2 DE bol2
+bna2 AAAAGCAAGACATCGCCGTTCACCAGCGCCATGTCTTTAAACCGGTCTCG
+#=GS bna2 AC bna2
+#=GS bna2 DE bna2
+bra2 AAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG
+#=GS bra2 AC bra2
+#=GS bra2 DE bra2
+aly2 ATAAGCAAAACGTCACCGTTAACCAAAGCCACGTCCTTAAACCGGTCTCG
+#=GS aly2 AC aly2
+#=GS aly2 DE aly2
+//
+# STOCKHOLM 1.0
+#=GF ID 9 SEQ3#1#50#+
+#=GF SQ 5
+ath3 CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGCGCT
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 CCGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGGGCGAGCT
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 CCGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGAGCGAGCT
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 CTGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGGGCGAGCT
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGAGCT
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 10 SEQ3#26#75#+
+#=GF SQ 5
+ath3 TCCAACTAGGTCGTACGGACGCGCTTTGTTTGGTGGCCGAAATCCGAGAA
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 CCCGACCAGGTCGTAAGGGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGA
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 CCCGACCAGGTCGTAAGAGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGA
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 CCCGACCAGGTCGTAAGGGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGA
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 TCCAACTAGGTCGTACGGACGAGCTTTGTTTGGTGGGCGAAACCCGAGAA
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 11 SEQ3#51#100#+
+#=GF SQ 5
+ath3 TTGTTTGGTGGCCGAAATCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAG
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 TTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAG
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 TTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAG
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 TTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAG
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 TTGTTTGGTGGGCGAAACCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAG
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 12 SEQ3#76#125#+
+#=GF SQ 5
+ath3 TGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCGGGGTTGTTCTCA
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 TGGTGGTTTCAGAGGTTGTGAGGAGCACACGGCTTCCGGGGTTGTTCTCG
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 TGGTGGTTTCAGAGGTTGTGAGGAGCACACGGCTTCCGGGGTTGTTCTCG
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 TGGTGGTTTCAGAGGTTGTGAGGAGAACACGGCTTCCGGGGTTGTTCTCA
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 TGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCCGGGTTGTTCTCA
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 13 SEQ3#101#150#+
+#=GF SQ 5
+ath3 AACACGGCTTCCGGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGC
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 CACACGGCTTCCGGGGTTGTTCTCGGCTATGTCTTTGGCGACACGGAGGC
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 CACACGGCTTCCGGGGTTGTTCTCGGCTATGTCTTTGGCGACACGGAGGC
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 AACACGGCTTCCGGGGTTGTTCTCAGCGATGTCTTTGGCCACACGGAGGC
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 AACACGGCTTCCCGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGC
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 14 SEQ3#126#175#+
+#=GF SQ 5
+ath3 GCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAG
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 GCTATGTCTTTGGCGACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAG
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 GCTATGTCTTTGGCGACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAG
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 GCGATGTCTTTGGCCACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAG
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 GCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAG
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 15 SEQ3#151#200#+
+#=GF SQ 5
+ath3 CAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACG
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 CTGTTACGCCTCCGTAGCAGCCTAGAAAATAAAGCATCACTCTGTTCACG
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 CTGTTACGCCTCCGTAGCAGCCTAGAAAGTAAAGCATTACTCTGTTCACG
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 CTGTTACGCCTCCGTAGCAGCCTAGAAAATAAAGCATTACTCTGTTCACG
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 CAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACG
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 16 SEQ3#176#225#+
+#=GF SQ 5
+ath3 AAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTG
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 AAAATAAAGCATCACTCTGTTCACGTCGTTCTTTAAGCCTAGCTTGGCGG
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 AAAGTAAAGCATTACTCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTG
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 AAAATAAAGCATTACTCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTG
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 AAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTG
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 17 SEQ3#191#240#+
+#=GF SQ 5
+ath3 TCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC
+#=GS ath3 AC ath3
+#=GS ath3 DE ath3
+bna3 TCTGTTCACGTCGTTCTTTAAGCCTAGCTTGGCGGAGAGGTAAAGGTCAC
+#=GS bna3 AC bna3
+#=GS bna3 DE bna3
+bol3 TCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTGAGAGGTAAAGGTCGC
+#=GS bol3 AC bol3
+#=GS bol3 DE bol3
+bra3 TCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTGAGAGGTAAAGGTCGC
+#=GS bra3 AC bra3
+#=GS bra3 DE bra3
+aly3 TCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC
+#=GS aly3 AC aly3
+#=GS aly3 DE aly3
+//
+# STOCKHOLM 1.0
+#=GF ID 18 SEQ4#1#50#+
+#=GF SQ 5
+ath4 A----------TGAAGGAGGACTACTTACGAGTGTTGGAAGCGGCAGCGGCAGTGACCGC
+#=GS ath4 AC ath4
+#=GS ath4 DE ath4
+bna4 ATATATAGCCTTGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGC
+#=GS bna4 AC bna4
+#=GS bna4 DE bna4
+bol4 ATATATAGCCTTGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGC
+#=GS bol4 AC bol4
+#=GS bol4 DE bol4
+bra4 ATATATGGCCTAGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGC
+#=GS bra4 AC bra4
+#=GS bra4 DE bra4
+aly4 -----------TAAAGGAGGACTACTTACGAGTGCTGTAAGCGGCAGCGGCAGTGCCCGC
+#=GS aly4 AC aly4
+#=GS aly4 DE aly4
+//
+# STOCKHOLM 1.0
+#=GF ID 19 SEQ4#26#75#+
+#=GF SQ 5
+ath4 TGGAAGCGGCAGCGGCAGTGACCGCAGCTGAAGAGCTTGTCGGAGAAACC
+#=GS ath4 AC ath4
+#=GS ath4 DE ath4
+bna4 TGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGTTTGTCGGAAAATCC
+#=GS bna4 AC bna4
+#=GS bna4 DE bna4
+bol4 TGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGTTTGTCGGAAAATCC
+#=GS bol4 AC bol4
+#=GS bol4 DE bol4
+bra4 TGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGCTTGTCGGAAAAGCC
+#=GS bra4 AC bra4
+#=GS bra4 DE bra4
+aly4 TGTAAGCGGCAGCGGCAGTGCCCGCAGCTGAAGAGCTTGTCGGAGAAACC
+#=GS aly4 AC aly4
+#=GS aly4 DE aly4
+//
+# STOCKHOLM 1.0
+#=GF ID 20 SEQ4#51#100#+
+#=GF SQ 5
+ath4 AGCTGAAGAGCTTGTCGGAGAAACCGACATCACCGCACATGCAACACACT
+#=GS ath4 AC ath4
+#=GS ath4 DE ath4
+bna4 AGCGGAATAGTTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACT
+#=GS bna4 AC bna4
+#=GS bna4 DE bna4
+bol4 AGCGGAATAGTTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACT
+#=GS bol4 AC bol4
+#=GS bol4 DE bol4
+bra4 AGCGGAATAGCTTGTCGGAAAAGCCGACGTCGCCGCACATGCAGCACACT
+#=GS bra4 AC bra4
+#=GS bra4 DE bra4
+aly4 AGCTGAAGAGCTTGTCGGAGAAACCGACGTCGCCGCACATGCAACATACT
+#=GS aly4 AC aly4
+#=GS aly4 DE aly4
+//
+# STOCKHOLM 1.0
+#=GF ID 21 SEQ4#61#110#+
+#=GF SQ 5
+ath4 CTTGTCGGAGAAACCGACATCACCGCACATGCAACACACTCTCTCCATGa
+#=GS ath4 AC ath4
+#=GS ath4 DE ath4
+bna4 TTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT
+#=GS bna4 AC bna4
+#=GS bna4 DE bna4
+bol4 TTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT
+#=GS bol4 AC bol4
+#=GS bol4 DE bol4
+bra4 CTTGTCGGAAAAGCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT
+#=GS bra4 AC bra4
+#=GS bra4 DE bra4
+aly4 CTTGTCGGAGAAACCGACGTCGCCGCACATGCAACATACTCTCTCCATGA
+#=GS aly4 AC aly4
+#=GS aly4 DE aly4
+//
+# STOCKHOLM 1.0
+#=GF ID 22 SEQ5#1#50#+
+#=GF SQ 5
+ath5 AACGGCGTCAAGGATCTCCTTCAAATTCTTGTGCCTTACCACCA-ATTTGG
+#=GS ath5 AC ath5
+#=GS ath5 DE ath5
+bra5 GATCGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGGCGACATCTCC
+#=GS bra5 AC bra5
+#=GS bra5 DE bra5
+bol5 GATGGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGgcgacatctcc
+#=GS bol5 AC bol5
+#=GS bol5 DE bol5
+bna5 GATGGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGGCGACATCTCC
+#=GS bna5 AC bna5
+#=GS bna5 DE bna5
+aly5 AACGGCGTCGAGGATCTCCTTCAAATTCTTGTGCCTTACCACCA-ATTTGG
+#=GS aly5 AC aly5
+#=GS aly5 DE aly5
+//
+# STOCKHOLM 1.0
+#=GF ID 23 SEQ5#26#75#+
+#=GF SQ 5
+ath5 TTCTTGTGCCTTACCACCA-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGT
+#=GS ath5 AC ath5
+#=GS ath5 DE ath5
+bra5 TCCCTATGCCTAACGGCGACATCTCCT------------CCTCCACCTCTATAGCTACCTGAAAACGTCGT
+#=GS bra5 AC bra5
+#=GS bra5 DE bra5
+bol5 TCCCTATGCCTAACGgcgacatctcctcctccgcctccgcctCCGCCTCTATAGCTACCGCAATACGTCGT
+#=GS bol5 AC bol5
+#=GS bol5 DE bol5
+bna5 TCCCTATGCCTAACGGCGACATCTCCT------CCTCCGCCTCCGCCTCTATAGCTACCGCAATACGTCGT
+#=GS bna5 AC bna5
+#=GS bna5 DE bna5
+aly5 TTCTTGTGCCTTACCACCA-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGT
+#=GS aly5 AC aly5
+#=GS aly5 DE aly5
+//
+# STOCKHOLM 1.0
+#=GF ID 24 SEQ5#44#93#+
+#=GF SQ 5
+ath5 A-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTGCTC
+#=GS ath5 AC ath5
+#=GS ath5 DE ath5
+bra5 ACATCTCCT------------CCTCCACCTCTATAGCTACCTGAAAACGTCGTCGCATCATCTGCTTTATC
+#=GS bra5 AC bra5
+#=GS bra5 DE bra5
+bol5 acatctcctcctccgcctccgcctCCGCCTCTATAGCTACCGCAATACGTCGTCGCATCATCTTCTTTATC
+#=GS bol5 AC bol5
+#=GS bol5 DE bol5
+bna5 ACATCTCCT------CCTCCGCCTCCGCCTCTATAGCTACCGCAATACGTCGTCGCATCATCTTCTTTATC
+#=GS bna5 AC bna5
+#=GS bna5 DE bna5
+aly5 A-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTTGTC
+#=GS aly5 AC aly5
+#=GS aly5 DE aly5
+//
+# STOCKHOLM 1.0
+#=GF ID 25 SEQ6#1#50#+
+#=GF SQ 5
+ath6 TAAAAAATAAAGAAT---CTTACCATCACCACGACTGTTTGTTCTAGCCAACT
+#=GS ath6 AC ath6
+#=GS ath6 DE ath6
+bol6 TAAAATGGAGAACCTTTGCTTACCATCGCCACGACGGTTTGTTCTAGGCAACT
+#=GS bol6 AC bol6
+#=GS bol6 DE bol6
+bna6 TAAAATGGAGAACCTTTACTTACCATCGCCACGACGGTTTGTTCTAGGCAACT
+#=GS bna6 AC bna6
+#=GS bna6 DE bna6
+bra6 TAAAATGGAGAACCTTTACTTACCATCGCCACGACGGTTTGTTCTAGGCAACT
+#=GS bra6 AC bra6
+#=GS bra6 DE bra6
+aly6 TaaaaaaaaaaGAAT---GTTACCATCACCACGACTGTTTGTTCTAGCCAACT
+#=GS aly6 AC aly6
+#=GS aly6 DE aly6
+//
+# STOCKHOLM 1.0
+#=GF ID 26 SEQ6#26#75#+
+#=GF SQ 5
+ath6 CCACGACTGTTTGTTCTAGCCAACTGATAAATAGTGTAGCCTGAAGATGA
+#=GS ath6 AC ath6
+#=GS ath6 DE ath6
+bol6 CCACGACGGTTTGTTCTAGGCAACTGGAAAATATTGTAGCCTGCAGTGGC
+#=GS bol6 AC bol6
+#=GS bol6 DE bol6
+bna6 CCACGACGGTTTGTTCTAGGCAACTGGAAAATATTGTAGCCTGCAGTGGC
+#=GS bna6 AC bna6
+#=GS bna6 DE bna6
+bra6 CCACGACGGTTTGTTCTAGGCAACTGGAAAATATCGTAGCCTGCAGTGGC
+#=GS bra6 AC bra6
+#=GS bra6 DE bra6
+aly6 CCACGACTGTTTGTTCTAGCCAACTGGAAAATAGTGTAGCCAGAAGTGCC
+#=GS aly6 AC aly6
+#=GS aly6 DE aly6
+//
+# STOCKHOLM 1.0
+#=GF ID 27 SEQ6#51#100#+
+#=GF SQ 5
+ath6 GATAAATAGTGTAGCCTGAAGATGAAAGCTGGTGGTGGTACATGTTCACT
+#=GS ath6 AC ath6
+#=GS ath6 DE ath6
+bol6 GGAAAATATTGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCACC
+#=GS bol6 AC bol6
+#=GS bol6 DE bol6
+bna6 GGAAAATATTGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCACC
+#=GS bna6 AC bna6
+#=GS bna6 DE bna6
+bra6 GGAAAATATCGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCGCC
+#=GS bra6 AC bra6
+#=GS bra6 DE bra6
+aly6 GGAAAATAGTGTAGCCAGAAGTGCCAAGCTGGTCGTGGTACATGTTGACT
+#=GS aly6 AC aly6
+#=GS aly6 DE aly6
+//
+# STOCKHOLM 1.0
+#=GF ID 28 SEQ6#68#117#+
+#=GF SQ 5
+ath6 GAAGATGAAAGCTGGTGGTGGTACATGTTCACTAATTCCTCATTCCCAAC
+#=GS ath6 AC ath6
+#=GS ath6 DE ath6
+bol6 GCAGTGGCAAGGCGATCATGGTACATGTTCACCAATTCTTCATTTGCAAC
+#=GS bol6 AC bol6
+#=GS bol6 DE bol6
+bna6 GCAGTGGCAAGGCGATCATGGTACATGTTCACCAATTCTTCATTTGCAAC
+#=GS bna6 AC bna6
+#=GS bna6 DE bna6
+bra6 GCAGTGGCAAGGCGATCATGGTACATGTTCGCCAATTCTTCATTTGCAAC
+#=GS bra6 AC bra6
+#=GS bra6 DE bra6
+aly6 GAAGTGCCAAGCTGGTCGTGGTACATGTTGACTAATTCCTCATTCCCAAC
+#=GS aly6 AC aly6
+#=GS aly6 DE aly6
+//
+# STOCKHOLM 1.0
+#=GF ID 29 SEQ7#1#50#+
+#=GF SQ 5
+ath7 TGGATCTCTGGATCAGCTGTGCGACCAGACGTGGATGATCCAAACGCTAA
+#=GS ath7 AC ath7
+#=GS ath7 DE ath7
+bra7 TGAAGCTCCGGGTCAGCAGTTCGGCCGGACGTTGATGATCCAAACGCTAA
+#=GS bra7 AC bra7
+#=GS bra7 DE bra7
+bol7 TGAAGCTCCGGATCAGCAGCTCGGCCGGACGTTGAAGATCCAAACGCTAA
+#=GS bol7 AC bol7
+#=GS bol7 DE bol7
+bna7 TGAAGCTCCGGATCAGCAGCTCGGCCGGACGTTGAAGATCCAAACGCTAA
+#=GS bna7 AC bna7
+#=GS bna7 DE bna7
+aly7 TGGATCTCTGGATCAGCGGTGCGGCCAGACGTGGATGATCCAAACGCTAA
+#=GS aly7 AC aly7
+#=GS aly7 DE aly7
+//
+# STOCKHOLM 1.0
+#=GF ID 30 SEQ7#26#75#+
+#=GF SQ 5
+ath7 CAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACA
+#=GS ath7 AC ath7
+#=GS ath7 DE ath7
+bra7 CGGACGTTGATGATCCAAACGCTAAACTCCTTTGAACGACTGGTCGGACG
+#=GS bra7 AC bra7
+#=GS bra7 DE bra7
+bol7 CGGACGTTGAAGATCCAAACGCTAAACGACTTTGAGCGACTGGTCGGACG
+#=GS bol7 AC bol7
+#=GS bol7 DE bol7
+bna7 CGGACGTTGAAGATCCAAACGCTAAACGACTTTGAGCGACTGGTCGGACG
+#=GS bna7 AC bna7
+#=GS bna7 DE bna7
+aly7 CAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACA
+#=GS aly7 AC aly7
+#=GS aly7 DE aly7
+//
+# STOCKHOLM 1.0
+#=GF ID 31 SEQ7#51#100#+
+#=GF SQ 5
+ath7 ACGCCTTTGAGCGATTGGTCGAACACGTGAACGCGCCGCTGAAGCAAACG
+#=GS ath7 AC ath7
+#=GS ath7 DE ath7
+bra7 ACTCCTTTGAACGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACG
+#=GS bra7 AC bra7
+#=GS bra7 DE bra7
+bol7 ACGACTTTGAGCGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACG
+#=GS bol7 AC bol7
+#=GS bol7 DE bol7
+bna7 ACGACTTTGAGCGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACG
+#=GS bna7 AC bna7
+#=GS bna7 DE bna7
+aly7 ACGCCTTTGAGCGATTGGTCGAACACGGGAACGCGCCGCAGAAGCAAACG
+#=GS aly7 AC aly7
+#=GS aly7 DE aly7
+//
+# STOCKHOLM 1.0
+#=GF ID 32 SEQ7#68#117#+
+#=GF SQ 5
+ath7 GTCGAACACGTGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGATTG---C
+#=GS ath7 AC ath7
+#=GS ath7 DE ath7
+bra7 GTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC
+#=GS bra7 AC bra7
+#=GS bra7 DE bra7
+bol7 GTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC
+#=GS bol7 AC bol7
+#=GS bol7 DE bol7
+bna7 GTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC
+#=GS bna7 AC bna7
+#=GS bna7 DE bna7
+aly7 GTCGAACACGGGAACGCGCCGCAGAAGCAAACGCTAATAATCGGGATTGCATC
+#=GS aly7 AC aly7
+#=GS aly7 DE aly7
+//
+# STOCKHOLM 1.0
+#=GF ID 33 SEQ8#1#50#+
+#=GF SQ 5
+ath8 TTGCTCTTCTAAACTCGGTCGGGGAGTATCGTCTACGATTTCTTCCGCCG
+#=GS ath8 AC ath8
+#=GS ath8 DE ath8
+bra8 TTGCTCTTCTAAACTTGACCGTGGTTGTTCATCGACGATTTCTTCTGCCG
+#=GS bra8 AC bra8
+#=GS bra8 DE bra8
+bol8 TtgttgtTctaaacttgaccgtggttGTTCGTCGACGATTTCTTCTGCCG
+#=GS bol8 AC bol8
+#=GS bol8 DE bol8
+bna8 TTGTTGTTCTAAACTTGACCGTGGTTGTTCGTCGACGATTTCTTCTGCCG
+#=GS bna8 AC bna8
+#=GS bna8 DE bna8
+aly8 TTGCTCTTCTAAACTCGGTCGTGGGGTGTCGTCTACGATTTCATCCGCCG
+#=GS aly8 AC aly8
+#=GS aly8 DE aly8
+//
+# STOCKHOLM 1.0
+#=GF ID 34 SEQ8#26#75#+
+#=GF SQ 5
+ath8 GTATCGTCTACGATTTCTTCCGCCGCCGTTTTAGGGTTTGCAACATCATC
+#=GS ath8 AC ath8
+#=GS ath8 DE ath8
+bra8 TGTTCATCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCTACATCATC
+#=GS bra8 AC bra8
+#=GS bra8 DE bra8
+bol8 tGTTCGTCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCCACGTCATC
+#=GS bol8 AC bol8
+#=GS bol8 DE bol8
+bna8 TGTTCGTCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCCACGTCATC
+#=GS bna8 AC bna8
+#=GS bna8 DE bna8
+aly8 GTGTCGTCTACGATTTCATCCGCCGCCGTTTTAGGGTTTGCAACATCATC
+#=GS aly8 AC aly8
+#=GS aly8 DE aly8
+//
+# STOCKHOLM 1.0
+#=GF ID 35 SEQ8#51#100#+
+#=GF SQ 5
+ath8 CCGTTTTAGGGTTTGCAACATCATCCATACCCTGAAACACAAGCAATCAC
+#=GS ath8 AC ath8
+#=GS ath8 DE ath8
+bra8 CCGTCTTAGGGTTCGCTACATCATCCATACCCTGAAGCATAATCAATCAC
+#=GS bra8 AC bra8
+#=GS bra8 DE bra8
+bol8 CCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAAC
+#=GS bol8 AC bol8
+#=GS bol8 DE bol8
+bna8 CCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAAC
+#=GS bna8 AC bna8
+#=GS bna8 DE bna8
+aly8 CCGTTTTAGGGTTTGCAACATCATCCATAACCTGAAACACAAGGAATCAC
+#=GS aly8 AC aly8
+#=GS aly8 DE aly8
+//
+# STOCKHOLM 1.0
+#=GF ID 36 SEQ8#68#117#+
+#=GF SQ 5
+ath8 ACATCATCCATACCCTGAAACACAAGCAATCACCAAAA---ACTTAAACAAAG
+#=GS ath8 AC ath8
+#=GS ath8 DE ath8
+bra8 ACATCATCCATACCCTGAAGCATAATCAATCACAAAACAATATTT-AAAAGAG
+#=GS bra8 AC bra8
+#=GS bra8 DE bra8
+bol8 ACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA-
+#=GS bol8 AC bol8
+#=GS bol8 DE bol8
+bna8 ACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA-
+#=GS bna8 AC bna8
+#=GS bna8 DE bna8
+aly8 ACATCATCCATAACCTGAAACACAAGGAATCAC--------ACTT-AACAGAG
+#=GS aly8 AC aly8
+#=GS aly8 DE aly8
+//
+# STOCKHOLM 1.0
+#=GF ID 37 SEQ9#1#50#+
+#=GF SQ 5
+ath9 CGTAGAAAAGGCTTGACCGCAAAATGGATATATTATATGTACCTATGAGT
+#=GS ath9 AC ath9
+#=GS ath9 DE ath9
+bna9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGT
+#=GS bna9 AC bna9
+#=GS bna9 DE bna9
+bol9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGT
+#=GS bol9 AC bol9
+#=GS bol9 DE bol9
+bra9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGT
+#=GS bra9 AC bra9
+#=GS bra9 DE bra9
+aly9 CATAGAAAAGGCTTGACCACAAACTGGA-----TATATATACCTATGAGT
+#=GS aly9 AC aly9
+#=GS aly9 DE aly9
+//
+# STOCKHOLM 1.0
+#=GF ID 38 SEQ9#26#75#+
+#=GF SQ 5
+ath9 GGATATATTATATGTACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTA
+#=GS ath9 AC ath9
+#=GS ath9 DE ath9
+bna9 AAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTA
+#=GS bna9 AC bna9
+#=GS bna9 DE bna9
+bol9 AAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTA
+#=GS bol9 AC bol9
+#=GS bol9 DE bol9
+bra9 AAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGAAGTA
+#=GS bra9 AC bra9
+#=GS bra9 DE bra9
+aly9 GGA-----TATATATACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTA
+#=GS aly9 AC aly9
+#=GS aly9 DE aly9
+//
+# STOCKHOLM 1.0
+#=GF ID 39 SEQ9#51#100#+
+#=GF SQ 5
+ath9 TTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---TCTACAATATTGAAGCCAA
+#=GS ath9 AC ath9
+#=GS ath9 DE ath9
+bna9 TTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAA
+#=GS bna9 AC bna9
+#=GS bna9 DE bna9
+bol9 TTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAA
+#=GS bol9 AC bol9
+#=GS bol9 DE bol9
+bra9 TTCTCGAGCTTTGTTTGAAGAAGTAATCCTCTTCCCTATAATATCCAAGCAAA
+#=GS bra9 AC bra9
+#=GS bra9 DE bra9
+aly9 TTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---CCTAAAATATTGAAGCCAA
+#=GS aly9 AC aly9
+#=GS aly9 DE aly9
+//
+# STOCKHOLM 1.0
+#=GF ID 40 SEQ9#76#125#+
+#=GF SQ 5
+ath9 ATCCTC---TCTACAATATTGAAGCCAA-CTATGGTCAAACCACAATCAAATTC
+#=GS ath9 AC ath9
+#=GS ath9 DE ath9
+bna9 ATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC------
+#=GS bna9 AC bna9
+#=GS bna9 DE bna9
+bol9 ATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC------
+#=GS bol9 AC bol9
+#=GS bol9 DE bol9
+bra9 ATCCTCTTCCCTATAATATCCAAGCAAAGCTGTGAGTAAACTGCAACC------
+#=GS bra9 AC bra9
+#=GS bra9 DE bra9
+aly9 ATCCTC---CCTAAAATATTGAAGCCAA-CTACTGTCAAACCACAATAAATTTC
+#=GS aly9 AC aly9
+#=GS aly9 DE aly9
+//
+# STOCKHOLM 1.0
+#=GF ID 41 SEQ9#101#150#+
+#=GF SQ 5
+ath9 CTATGGTCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTAC
+#=GS ath9 AC ath9
+#=GS ath9 DE ath9
+bna9 CTGTGAGTAAACTACAACC----------TATTTGGGCAAA---------
+#=GS bna9 AC bna9
+#=GS bna9 DE bna9
+bol9 CTGTGAGTAAACTACAACC----------TATTTGGGCAAA---------
+#=GS bol9 AC bol9
+#=GS bol9 DE bol9
+bra9 CTGTGAGTAAACTGCAACC----------TAATTGGGCAAA---------
+#=GS bra9 AC bra9
+#=GS bra9 DE bra9
+aly9 CTACTGTCAAACCACAATAAATTTCCCTATAGCTCCTCAAA---------
+#=GS aly9 AC aly9
+#=GS aly9 DE aly9
+//
+# STOCKHOLM 1.0
+#=GF ID 42 SEQ9#107#156#+
+#=GF SQ 5
+ath9 TCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTACTCAAGC
+#=GS ath9 AC ath9
+#=GS ath9 DE ath9
+bna9 GTAAACTACAACC----------TATTTGGGCAAA---------------
+#=GS bna9 AC bna9
+#=GS bna9 DE bna9
+bol9 GTAAACTACAACC----------TATTTGGGCAAA---------------
+#=GS bol9 AC bol9
+#=GS bol9 DE bol9
+bra9 GTAAACTGCAACC----------TAATTGGGCAAA---------------
+#=GS bra9 AC bra9
+#=GS bra9 DE bra9
+aly9 TCAAACCACAATAAATTTCCCTATAGCTCCTCAAA---------------
+#=GS aly9 AC aly9
+#=GS aly9 DE aly9
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_4_all.stk	Tue Jul 18 01:43:49 2017 -0400
@@ -0,0 +1,72 @@
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath1  TAACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTGTCGTTTCCACG---------GCTGTCGTAGACGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTAGGCGGAGAAGT 
+bol1  TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGTGGCACG---TGTAACGTTTGGTGGTTATAGTAAGCTGTCAAGT 
+bna1  TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGCGGCACG---TGTAACGTTTGATGGTTATAGTAAGCTGTCAAGT 
+bra1  TTACTCGGAAGTTGTCAACTAGGGAACCGCGAGGTTTTGTCGCCTTGACGGTCTTCAACACCGTCGTCGATGGT---GGCACG---------------TGATTATAGTATGCGGTCAAGT 
+aly1  TTACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTATCGTCTTCACATCTCTCACCGCCGCCGGAGACGGTGGCTGCTGCTGCTGCAGCGGTTGATGAGAGTAGTAGGCGGAGAAGT 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath2  GGTCGAGAAAGGAACCGGCAATATCGAACCGGAAAAATCCGAGATAACCACGTTTTGCATAAACTGGTACATAAGCAGAACGTCACCGTTAACCAAAGCCATGTCCTTAAACCGGTCTCG 
+bol2  AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG 
+bna2  AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAGCGCCATGTCTTTAAACCGGTCTCG 
+bra2  AGCAGAGAACGGAACCGGTAAAATCGAACCGGTTGAACCGGAGCTGACCACGTTCTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG 
+aly2  GGTCGAGAAAGGAACCGGCAAAATCGAACCGGAAACATTCGAGCTAACCACGTTTTGCATAAACTGGTACATAAGCAAAACGTCACCGTTAACCAAAGCCACGTCCTTAAACCGGTCTCG 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath3  CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGCGCTTTGTTTGGTGGCCGAAATCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCGGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC 
+bna3  CCGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGGGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAGCACACGGCTTCCGGGGTTGTTCTCGGCTATGTCTTTGGCGACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAGAAAATAAAGCATCACTCTGTTCACGTCGTTCTTTAAGCCTAGCTTGGCGGAGAGGTAAAGGTCAC 
+bol3  CCGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGAGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAGCACACGGCTTCCGGGGTTGTTCTCGGCTATGTCTTTGGCGACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAGAAAGTAAAGCATTACTCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTGAGAGGTAAAGGTCGC 
+bra3  CTGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGGGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAGAACACGGCTTCCGGGGTTGTTCTCAGCGATGTCTTTGGCCACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAGAAAATAAAGCATTACTCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTGAGAGGTAAAGGTCGC 
+aly3  CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGAGCTTTGTTTGGTGGGCGAAACCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCCGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath4  A----------TGAAGGAGGACTACTTACGAGTGTTGGAAGCGGCAGCGGCAGTGACCGCAGCTGAAGAGCTTGTCGGAGAAACCGACATCACCGCACATGCAACACACTCTCTCCATGa 
+bna4  ATATATAGCCTTGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGTTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT 
+bol4  ATATATAGCCTTGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGTTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT 
+bra4  ATATATGGCCTAGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGCTTGTCGGAAAAGCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT 
+aly4  -----------TAAAGGAGGACTACTTACGAGTGCTGTAAGCGGCAGCGGCAGTGCCCGCAGCTGAAGAGCTTGTCGGAGAAACCGACGTCGCCGCACATGCAACATACTCTCTCCATGA 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath5  AACGGCGTCAAGGATCTCCTTCAAATTCTTGTGCCTTACCACCA-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTGCTC 
+bra5  GATCGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGGCGACATCTCCT------------CCTCCACCTCTATAGCTACCTGAAAACGTCGTCGCATCATCTGCTTTATC 
+bol5  GATGGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGgcgacatctcctcctccgcctccgcctCCGCCTCTATAGCTACCGCAATACGTCGTCGCATCATCTTCTTTATC 
+bna5  GATGGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGGCGACATCTCCT------CCTCCGCCTCCGCCTCTATAGCTACCGCAATACGTCGTCGCATCATCTTCTTTATC 
+aly5  AACGGCGTCGAGGATCTCCTTCAAATTCTTGTGCCTTACCACCA-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTTGTC 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath6  TAAAAAATAAAGAAT---CTTACCATCACCACGACTGTTTGTTCTAGCCAACTGATAAATAGTGTAGCCTGAAGATGAAAGCTGGTGGTGGTACATGTTCACTAATTCCTCATTCCCAAC 
+bol6  TAAAATGGAGAACCTTTGCTTACCATCGCCACGACGGTTTGTTCTAGGCAACTGGAAAATATTGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCACCAATTCTTCATTTGCAAC 
+bna6  TAAAATGGAGAACCTTTACTTACCATCGCCACGACGGTTTGTTCTAGGCAACTGGAAAATATTGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCACCAATTCTTCATTTGCAAC 
+bra6  TAAAATGGAGAACCTTTACTTACCATCGCCACGACGGTTTGTTCTAGGCAACTGGAAAATATCGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCGCCAATTCTTCATTTGCAAC 
+aly6  TaaaaaaaaaaGAAT---GTTACCATCACCACGACTGTTTGTTCTAGCCAACTGGAAAATAGTGTAGCCAGAAGTGCCAAGCTGGTCGTGGTACATGTTGACTAATTCCTCATTCCCAAC 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath7  TGGATCTCTGGATCAGCTGTGCGACCAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACACGTGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGATTG---C 
+bra7  TGAAGCTCCGGGTCAGCAGTTCGGCCGGACGTTGATGATCCAAACGCTAAACTCCTTTGAACGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC 
+bol7  TGAAGCTCCGGATCAGCAGCTCGGCCGGACGTTGAAGATCCAAACGCTAAACGACTTTGAGCGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC 
+bna7  TGAAGCTCCGGATCAGCAGCTCGGCCGGACGTTGAAGATCCAAACGCTAAACGACTTTGAGCGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC 
+aly7  TGGATCTCTGGATCAGCGGTGCGGCCAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACACGGGAACGCGCCGCAGAAGCAAACGCTAATAATCGGGATTGCATC 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath8  TTGCTCTTCTAAACTCGGTCGGGGAGTATCGTCTACGATTTCTTCCGCCGCCGTTTTAGGGTTTGCAACATCATCCATACCCTGAAACACAAGCAATCACCAAAA---ACTTAAACAAAG 
+bra8  TTGCTCTTCTAAACTTGACCGTGGTTGTTCATCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCTACATCATCCATACCCTGAAGCATAATCAATCACAAAACAATATTT-AAAAGAG 
+bol8  TtgttgtTctaaacttgaccgtggttGTTCGTCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA- 
+bna8  TTGTTGTTCTAAACTTGACCGTGGTTGTTCGTCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA- 
+aly8  TTGCTCTTCTAAACTCGGTCGTGGGGTGTCGTCTACGATTTCATCCGCCGCCGTTTTAGGGTTTGCAACATCATCCATAACCTGAAACACAAGGAATCAC--------ACTT-AACAGAG 
+//
+# STOCKHOLM 1.0
+#=GF SQ 5
+ath9  CGTAGAAAAGGCTTGACCGCAAAATGGATATATTATATGTACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---TCTACAATATTGAAGCCAA-CTATGGTCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTACTCAAGC 
+bna9  C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC----------TATTTGGGCAAA--------------- 
+bol9  C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC----------TATTTGGGCAAA--------------- 
+bra9  C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGAAGTAATCCTCTTCCCTATAATATCCAAGCAAAGCTGTGAGTAAACTGCAACC----------TAATTGGGCAAA--------------- 
+aly9  CATAGAAAAGGCTTGACCACAAACTGGA-----TATATATACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---CCTAAAATATTGAAGCCAA-CTACTGTCAAACCACAATAAATTTCCCTATAGCTCCTCAAA--------------- 
+//
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_4_representatives.fa	Tue Jul 18 01:43:49 2017 -0400
@@ -0,0 +1,18 @@
+>ath1
+TAACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTGTCGTTTCCACGGCTGTCGTAGACGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTAGGCGGAGAAGT
+>ath2
+GGTCGAGAAAGGAACCGGCAATATCGAACCGGAAAAATCCGAGATAACCACGTTTTGCATAAACTGGTACATAAGCAGAACGTCACCGTTAACCAAAGCCATGTCCTTAAACCGGTCTCG
+>ath3
+CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGCGCTTTGTTTGGTGGCCGAAATCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCGGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC
+>ath4
+ATGAAGGAGGACTACTTACGAGTGTTGGAAGCGGCAGCGGCAGTGACCGCAGCTGAAGAGCTTGTCGGAGAAACCGACATCACCGCACATGCAACACACTCTCTCCATGa
+>ath5
+AACGGCGTCAAGGATCTCCTTCAAATTCTTGTGCCTTACCACCAATTTGGTCTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTGCTC
+>ath6
+TAAAAAATAAAGAATCTTACCATCACCACGACTGTTTGTTCTAGCCAACTGATAAATAGTGTAGCCTGAAGATGAAAGCTGGTGGTGGTACATGTTCACTAATTCCTCATTCCCAAC
+>ath7
+TGGATCTCTGGATCAGCTGTGCGACCAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACACGTGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGATTGC
+>ath8
+TTGCTCTTCTAAACTCGGTCGGGGAGTATCGTCTACGATTTCTTCCGCCGCCGTTTTAGGGTTTGCAACATCATCCATACCCTGAAACACAAGCAATCACCAAAAACTTAAACAAAG
+>ath9
+CGTAGAAAAGGCTTGACCGCAAAATGGATATATTATATGTACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTAATCCTCTCTACAATATTGAAGCCAACTATGGTCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTACTCAAGC