Previous changeset 10:16bcaef3dc1e (2017-06-01) Next changeset 12:8a1786cdcf95 (2017-11-20) |
Commit message:
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust commit 746497a64b955f6b9afc1944d1c1d8d877e53267 |
modified:
preprocessing.xml splitSHAPE.py |
added:
splitStockholm.py test-data/sample_4_alignment_data_split.stk test-data/sample_4_all.stk test-data/sample_4_representatives.fa |
b |
diff -r 16bcaef3dc1e -r c0c9d19bc7b2 preprocessing.xml --- a/preprocessing.xml Thu Jun 01 12:11:37 2017 -0400 +++ b/preprocessing.xml Tue Jul 18 01:43:49 2017 -0400 |
b |
@@ -1,7 +1,8 @@ -<tool id="preproc" name="Preprocessing" version="0.2"> +<tool id="preproc" name="Preprocessing" version="0.3"> <requirements> - <requirement type="package" version="0.1.12">graphclust-wrappers</requirement> + <requirement type="package" version="0.3.1">graphclust-wrappers</requirement> <requirement type="package" version="3.0">zip</requirement> + <requirement type="package" version="1.70">biopython</requirement> </requirements> <stdio> @@ -19,13 +20,22 @@ && python '$__tool_directory__/splitSHAPE.py' '$SHAPEdata' - $max_length + #end if + + #if $AlignmentData: + && + python '$__tool_directory__/splitStockholm.py' + '$AlignmentData' + + #end if + ]]> </command> <inputs> <param type="data" name="fastaFile" format="fasta" /> <param type="data" name="SHAPEdata" format="txt" optional="true" label="SHAPE data"/> + <param type="data" name="AlignmentData" format="stockholm" optional="true" label="Alignments file"/> <param name="max_length" type="integer" value="10000" size="5" label="window size"/> <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/> <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/> @@ -36,7 +46,8 @@ <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/> <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/> <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/> - <data name="shape_data_split" format="txt" from_work_dir="shape_data_split.react" label="SHAPE data splited"/> + <data name="shape_data_split" format="txt" from_work_dir="shape_data_split.react" label="SHAPE.data.split"/> + <data name="alignment_data_split" format="stockholm" from_work_dir="alignment_data_split.stk" label="alignments.data.stk"/> </outputs> <tests> <test> @@ -57,6 +68,14 @@ <param name="in_winShift" value="50"/> <param name="min_seq_length" value="5"/> <output name="shape_data_split" file="sample_3_shape_data_split.react" /> + </test> + <test> + <param name="fastaFile" value="sample_4_representatives.fa"/> + <param name="AlignmentData" value="sample_4_all.stk"/> + <param name="max_length" value="50"/> + <param name="in_winShift" value="50"/> + <param name="min_seq_length" value="5"/> + <output name="alignment_data_split" file="sample_4_alignment_data_split.stk" /> </test> </tests> <help> |
b |
diff -r 16bcaef3dc1e -r c0c9d19bc7b2 splitSHAPE.py --- a/splitSHAPE.py Thu Jun 01 12:11:37 2017 -0400 +++ b/splitSHAPE.py Tue Jul 18 01:43:49 2017 -0400 |
[ |
@@ -3,7 +3,6 @@ import sys shape_file = sys.argv[1] -win_size = int(sys.argv[2]) pattern = re.compile("^>.*$") toWrite = "" @@ -18,9 +17,9 @@ name_file = "FASTA/data.names" array_all_chunks = [] with open(name_file, 'r') as f: - content = f.read() - lines = content.split('\n')[:-1] - for line in lines: + for line in f: + if len(line.strip()) == 0: + continue seq_id.append(int(line.split()[0])) seq_string.append(line.split()[1]) orig_id_srt = line.split()[3] |
b |
diff -r 16bcaef3dc1e -r c0c9d19bc7b2 splitStockholm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splitStockholm.py Tue Jul 18 01:43:49 2017 -0400 |
[ |
@@ -0,0 +1,91 @@ +#!/usr/bin/env python + +######## +# This script reads multiple alignments merged in single stockholm file +# and splits the alignment blocks according to data.names table +# The first sequence of each alignment file assumed to match to names table entries +# Author: M. Miladi +######## +import os +import re +import sys + +from Bio import AlignIO, SeqIO +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +stk_file = sys.argv[1] +print ("Parsing and splitting stk file:{}".format(stk_file)) +target_f = "alignment_data_split.stk" +pattern = re.compile("^>.*$") +toWriteID = "" + +count_for_id = 1 +seq_counter = 0 +new_id = "" + +seq_id = [] +seq_string = [] +orig_id = [] +name_file = "FASTA/data.names" +array_all_chunks = [] +with open(name_file, 'r') as f: + for line in f: + if len(line.strip()) == 0: + continue + seq_id.append(int(line.split()[0])) + seq_string.append(line.split()[1]) + orig_id_srt = line.split()[3] + orig_id_srt = orig_id_srt.rsplit('_',1)[0] + orig_id.append(orig_id_srt) + + + +with open(stk_file) as stk_in: + alignments = AlignIO.parse(stk_in, "stockholm")#, alphabet=IUPAC.ambiguous_rna) + alignments_dic = {(a[0].id):a for a in alignments} + + +regx_gaps = '[-.~_]' # valid gap symbols all be converted to "-" +str_gaps = '-.~_' # valid gap symbols all be converted to "-" + + +chunks = [] +with open(target_f, 'w') as out_stk_handle: + for i in range(len(orig_id)): + + #---------------------- + # We need to map ungapped positions of the chunks to gapped positions of first sequence + gap_count = 0 + ungap_ind = 0 + dic_gap_counts = dict() + cur_alignment = alignments_dic[orig_id[i]] + for c in cur_alignment[0].seq: + #print ungap_ind + if c in str_gaps: + gap_count += 1 + else: + dic_gap_counts[ungap_ind] = gap_count + ungap_ind += 1 + ID = str(seq_id[i]) + " " + seq_string[i] + chunks = re.findall(r'\d+', seq_string[i]) + print (ID,chunks) + + index_start, index_end =int(chunks[1])-1, int(chunks[2])-1 + subalign = cur_alignment[:, index_start + dic_gap_counts[index_start]: + index_end+dic_gap_counts[index_end]+1] + + #---------------------- + # BioPython does not handel the GF ID entry for alignment + # So we add entry in the second line manually + siotmp = StringIO() + AlignIO.write(subalign, siotmp, format="stockholm") + stk_lines = siotmp.getvalue().split('\n') + out_stk_handle.write('{}\n'.format(stk_lines[0])) + out_stk_handle.write('#=GF ID {}\n'.format(ID)) + out_stk_handle.writelines('\n'.join(stk_lines[1:])) + #print out_stk_handle.getvalue() + + |
b |
diff -r 16bcaef3dc1e -r c0c9d19bc7b2 test-data/sample_4_alignment_data_split.stk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_4_alignment_data_split.stk Tue Jul 18 01:43:49 2017 -0400 |
b |
b'@@ -0,0 +1,798 @@\n+# STOCKHOLM 1.0\n+#=GF ID 1 SEQ1#1#50#+\n+#=GF SQ 5\n+ath1 TAACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTGTCGTTTCCACG\n+#=GS ath1 AC ath1\n+#=GS ath1 DE ath1\n+bol1 TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACG\n+#=GS bol1 AC bol1\n+#=GS bol1 DE bol1\n+bna1 TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACG\n+#=GS bna1 AC bna1\n+#=GS bna1 DE bna1\n+bra1 TTACTCGGAAGTTGTCAACTAGGGAACCGCGAGGTTTTGTCGCCTTGACG\n+#=GS bra1 AC bra1\n+#=GS bra1 DE bra1\n+aly1 TTACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTATCGTCTTCACA\n+#=GS aly1 AC aly1\n+#=GS aly1 DE aly1\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 2 SEQ1#26#75#+\n+#=GF SQ 5\n+ath1 AACTTGAGGTTTTGTCGTTTCCACG---------GCTGTCGTAGACGGTGGCAGCTGCT\n+#=GS ath1 AC ath1\n+#=GS ath1 DE ath1\n+bol1 ACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGTGGCACG-\n+#=GS bol1 AC bol1\n+#=GS bol1 DE bol1\n+bna1 ACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGCGGCACG-\n+#=GS bna1 AC bna1\n+#=GS bna1 DE bna1\n+bra1 ACCGCGAGGTTTTGTCGCCTTGACGGTCTTCAACACCGTCGTCGATGGT---GGCACG-\n+#=GS bra1 AC bra1\n+#=GS bra1 DE bra1\n+aly1 AACTTGAGGTTTTATCGTCTTCACATCTCTCACCGCCGCCGGAGACGGTGGCTGCTGCT\n+#=GS aly1 AC aly1\n+#=GS aly1 DE aly1\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 3 SEQ1#51#100#+\n+#=GF SQ 5\n+ath1 GCTGTCGTAGACGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTA\n+#=GS ath1 AC ath1\n+#=GS ath1 DE ath1\n+bol1 ACCGCTGTTGATGGTGGTGGCACG---TGTAACGTTTGGTGGTTATAGTA\n+#=GS bol1 AC bol1\n+#=GS bol1 DE bol1\n+bna1 ACCGCTGTTGATGGTGGCGGCACG---TGTAACGTTTGATGGTTATAGTA\n+#=GS bna1 AC bna1\n+#=GS bna1 DE bna1\n+bra1 ACCGTCGTCGATGGT---GGCACG---------------TGATTATAGTA\n+#=GS bra1 AC bra1\n+#=GS bra1 DE bra1\n+aly1 GCCGCCGGAGACGGTGGCTGCTGCTGCTGCAGCGGTTGATGAGAGTAGTA\n+#=GS aly1 AC aly1\n+#=GS aly1 DE aly1\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 4 SEQ1#62#111#+\n+#=GF SQ 5\n+ath1 CGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTAGGCGGAGAAGT\n+#=GS ath1 AC ath1\n+#=GS ath1 DE ath1\n+bol1 TGGTGGTGGCACG---TGTAACGTTTGGTGGTTATAGTAAGCTGTCAAGT\n+#=GS bol1 AC bol1\n+#=GS bol1 DE bol1\n+bna1 TGGTGGCGGCACG---TGTAACGTTTGATGGTTATAGTAAGCTGTCAAGT\n+#=GS bna1 AC bna1\n+#=GS bna1 DE bna1\n+bra1 TGGT---GGCACG---------------TGATTATAGTATGCGGTCAAGT\n+#=GS bra1 AC bra1\n+#=GS bra1 DE bra1\n+aly1 CGGTGGCTGCTGCTGCTGCAGCGGTTGATGAGAGTAGTAGGCGGAGAAGT\n+#=GS aly1 AC aly1\n+#=GS aly1 DE aly1\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 5 SEQ2#1#50#+\n+#=GF SQ 5\n+ath2 GGTCGAGAAAGGAACCGGCAATATCGAACCGGAAAAATCCGAGATAACCA\n+#=GS ath2 AC ath2\n+#=GS ath2 DE ath2\n+bol2 AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCA\n+#=GS bol2 AC bol2\n+#=GS bol2 DE bol2\n+bna2 AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCA\n+#=GS bna2 AC bna2\n+#=GS bna2 DE bna2\n+bra2 AGCAGAGAACGGAACCGGTAAAATCGAACCGGTTGAACCGGAGCTGACCA\n+#=GS bra2 AC bra2\n+#=GS bra2 DE bra2\n+aly2 GGTCGAGAAAGGAACCGGCAAAATCGAACCGGAAACATTCGAGCTAACCA\n+#=GS aly2 AC aly2\n+#=GS aly2 DE aly2\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 6 SEQ2#26#75#+\n+#=GF SQ 5\n+ath2 GAACCGGAAAAATCCGAGATAACCACGTTTTGCATAAACTGGTACATAAG\n+#=GS ath2 AC ath2\n+#=GS ath2 DE ath2\n+bol2 GAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAG\n+#=GS bol2 AC bol2\n+#=GS bol2 DE bol2\n+bna2 GAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAG\n+#=GS bna2 AC bna2\n+#=GS bna2 DE bna2\n+bra2 GAACCGGTTGAACCGGAGCTGACCACGTTCTGCATAAACTGGTACAAAAG\n+#=GS bra2 AC bra2\n+#=GS bra2 DE bra2\n+aly2 GAACCGGAAACATTCGAGCTAACCACGTTTTGCATAAACTGGTACATAAG\n+#=GS aly2 AC aly2\n+#=GS aly2 DE aly2\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 7 SEQ2#51#100#+\n+#=GF SQ 5\n+ath2 CGTTTTGCATAAACTGGTACATAAGCAGAACGTCACCGTTAACCAAAGCC\n+#=GS ath2 AC ath2\n+#=GS ath2 DE ath2\n+bol2 CGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCC\n+#=GS bol2 AC bol2\n+#=GS bol2 DE bol2\n+bna2 CGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAGCGCC\n+#=GS bna2 AC bna2\n+#=GS bna2 DE bna2\n+bra2 CGTTCTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCC\n+#=GS bra2 AC bra2\n+#=GS bra2 DE bra2\n+aly2 CGTTTTGCATAAACTGGTACATAAGCAAAACGTCACCGTTAACCAAAGCC\n+#=GS aly2 AC aly2\n+#=GS aly2 DE aly2\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 8 SEQ2#71#120#+\n+#=GF SQ 5\n+ath2 ATAAGCAGAACGTCACCGTTAACCAAAGCCATGTCCTTAAACCGGTCTCG\n+#=GS ath2 AC ath2\n+#=GS ath2 DE ath2\n+bol2 AAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG\n+#=GS bol2 AC'..b' bol8\n+#=GS bol8 DE bol8\n+bna8 CCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAAC\n+#=GS bna8 AC bna8\n+#=GS bna8 DE bna8\n+aly8 CCGTTTTAGGGTTTGCAACATCATCCATAACCTGAAACACAAGGAATCAC\n+#=GS aly8 AC aly8\n+#=GS aly8 DE aly8\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 36 SEQ8#68#117#+\n+#=GF SQ 5\n+ath8 ACATCATCCATACCCTGAAACACAAGCAATCACCAAAA---ACTTAAACAAAG\n+#=GS ath8 AC ath8\n+#=GS ath8 DE ath8\n+bra8 ACATCATCCATACCCTGAAGCATAATCAATCACAAAACAATATTT-AAAAGAG\n+#=GS bra8 AC bra8\n+#=GS bra8 DE bra8\n+bol8 ACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA-\n+#=GS bol8 AC bol8\n+#=GS bol8 DE bol8\n+bna8 ACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA-\n+#=GS bna8 AC bna8\n+#=GS bna8 DE bna8\n+aly8 ACATCATCCATAACCTGAAACACAAGGAATCAC--------ACTT-AACAGAG\n+#=GS aly8 AC aly8\n+#=GS aly8 DE aly8\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 37 SEQ9#1#50#+\n+#=GF SQ 5\n+ath9 CGTAGAAAAGGCTTGACCGCAAAATGGATATATTATATGTACCTATGAGT\n+#=GS ath9 AC ath9\n+#=GS ath9 DE ath9\n+bna9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGT\n+#=GS bna9 AC bna9\n+#=GS bna9 DE bna9\n+bol9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGT\n+#=GS bol9 AC bol9\n+#=GS bol9 DE bol9\n+bra9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGT\n+#=GS bra9 AC bra9\n+#=GS bra9 DE bra9\n+aly9 CATAGAAAAGGCTTGACCACAAACTGGA-----TATATATACCTATGAGT\n+#=GS aly9 AC aly9\n+#=GS aly9 DE aly9\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 38 SEQ9#26#75#+\n+#=GF SQ 5\n+ath9 GGATATATTATATGTACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTA\n+#=GS ath9 AC ath9\n+#=GS ath9 DE ath9\n+bna9 AAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTA\n+#=GS bna9 AC bna9\n+#=GS bna9 DE bna9\n+bol9 AAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTA\n+#=GS bol9 AC bol9\n+#=GS bol9 DE bol9\n+bra9 AAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGAAGTA\n+#=GS bra9 AC bra9\n+#=GS bra9 DE bra9\n+aly9 GGA-----TATATATACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTA\n+#=GS aly9 AC aly9\n+#=GS aly9 DE aly9\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 39 SEQ9#51#100#+\n+#=GF SQ 5\n+ath9 TTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---TCTACAATATTGAAGCCAA\n+#=GS ath9 AC ath9\n+#=GS ath9 DE ath9\n+bna9 TTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAA\n+#=GS bna9 AC bna9\n+#=GS bna9 DE bna9\n+bol9 TTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAA\n+#=GS bol9 AC bol9\n+#=GS bol9 DE bol9\n+bra9 TTCTCGAGCTTTGTTTGAAGAAGTAATCCTCTTCCCTATAATATCCAAGCAAA\n+#=GS bra9 AC bra9\n+#=GS bra9 DE bra9\n+aly9 TTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---CCTAAAATATTGAAGCCAA\n+#=GS aly9 AC aly9\n+#=GS aly9 DE aly9\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 40 SEQ9#76#125#+\n+#=GF SQ 5\n+ath9 ATCCTC---TCTACAATATTGAAGCCAA-CTATGGTCAAACCACAATCAAATTC\n+#=GS ath9 AC ath9\n+#=GS ath9 DE ath9\n+bna9 ATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC------\n+#=GS bna9 AC bna9\n+#=GS bna9 DE bna9\n+bol9 ATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC------\n+#=GS bol9 AC bol9\n+#=GS bol9 DE bol9\n+bra9 ATCCTCTTCCCTATAATATCCAAGCAAAGCTGTGAGTAAACTGCAACC------\n+#=GS bra9 AC bra9\n+#=GS bra9 DE bra9\n+aly9 ATCCTC---CCTAAAATATTGAAGCCAA-CTACTGTCAAACCACAATAAATTTC\n+#=GS aly9 AC aly9\n+#=GS aly9 DE aly9\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 41 SEQ9#101#150#+\n+#=GF SQ 5\n+ath9 CTATGGTCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTAC\n+#=GS ath9 AC ath9\n+#=GS ath9 DE ath9\n+bna9 CTGTGAGTAAACTACAACC----------TATTTGGGCAAA---------\n+#=GS bna9 AC bna9\n+#=GS bna9 DE bna9\n+bol9 CTGTGAGTAAACTACAACC----------TATTTGGGCAAA---------\n+#=GS bol9 AC bol9\n+#=GS bol9 DE bol9\n+bra9 CTGTGAGTAAACTGCAACC----------TAATTGGGCAAA---------\n+#=GS bra9 AC bra9\n+#=GS bra9 DE bra9\n+aly9 CTACTGTCAAACCACAATAAATTTCCCTATAGCTCCTCAAA---------\n+#=GS aly9 AC aly9\n+#=GS aly9 DE aly9\n+//\n+# STOCKHOLM 1.0\n+#=GF ID 42 SEQ9#107#156#+\n+#=GF SQ 5\n+ath9 TCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTACTCAAGC\n+#=GS ath9 AC ath9\n+#=GS ath9 DE ath9\n+bna9 GTAAACTACAACC----------TATTTGGGCAAA---------------\n+#=GS bna9 AC bna9\n+#=GS bna9 DE bna9\n+bol9 GTAAACTACAACC----------TATTTGGGCAAA---------------\n+#=GS bol9 AC bol9\n+#=GS bol9 DE bol9\n+bra9 GTAAACTGCAACC----------TAATTGGGCAAA---------------\n+#=GS bra9 AC bra9\n+#=GS bra9 DE bra9\n+aly9 TCAAACCACAATAAATTTCCCTATAGCTCCTCAAA---------------\n+#=GS aly9 AC aly9\n+#=GS aly9 DE aly9\n+//\n' |
b |
diff -r 16bcaef3dc1e -r c0c9d19bc7b2 test-data/sample_4_all.stk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_4_all.stk Tue Jul 18 01:43:49 2017 -0400 |
b |
@@ -0,0 +1,72 @@ +# STOCKHOLM 1.0 +#=GF SQ 5 +ath1 TAACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTGTCGTTTCCACG---------GCTGTCGTAGACGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTAGGCGGAGAAGT +bol1 TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGTGGCACG---TGTAACGTTTGGTGGTTATAGTAAGCTGTCAAGT +bna1 TTACTTTGAAGTTGTCAACTAGGCAACCGCGAGGTTTTGTCTCCTTGACGGTCTTCAACACCGCTGTTGATGGTGGCGGCACG---TGTAACGTTTGATGGTTATAGTAAGCTGTCAAGT +bra1 TTACTCGGAAGTTGTCAACTAGGGAACCGCGAGGTTTTGTCGCCTTGACGGTCTTCAACACCGTCGTCGATGGT---GGCACG---------------TGATTATAGTATGCGGTCAAGT +aly1 TTACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTATCGTCTTCACATCTCTCACCGCCGCCGGAGACGGTGGCTGCTGCTGCTGCAGCGGTTGATGAGAGTAGTAGGCGGAGAAGT +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath2 GGTCGAGAAAGGAACCGGCAATATCGAACCGGAAAAATCCGAGATAACCACGTTTTGCATAAACTGGTACATAAGCAGAACGTCACCGTTAACCAAAGCCATGTCCTTAAACCGGTCTCG +bol2 AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG +bna2 AGCAGAGAACGGAACCGGTAGAATTGAACCGGTTGAACCGGAGCTGACCACGTTTTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAGCGCCATGTCTTTAAACCGGTCTCG +bra2 AGCAGAGAACGGAACCGGTAAAATCGAACCGGTTGAACCGGAGCTGACCACGTTCTGCATAAACTGGTACAAAAGCAAGACATCGCCGTTCACCAACGCCATGTCTTTAAACCGGTCTCG +aly2 GGTCGAGAAAGGAACCGGCAAAATCGAACCGGAAACATTCGAGCTAACCACGTTTTGCATAAACTGGTACATAAGCAAAACGTCACCGTTAACCAAAGCCACGTCCTTAAACCGGTCTCG +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath3 CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGCGCTTTGTTTGGTGGCCGAAATCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCGGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC +bna3 CCGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGGGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAGCACACGGCTTCCGGGGTTGTTCTCGGCTATGTCTTTGGCGACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAGAAAATAAAGCATCACTCTGTTCACGTCGTTCTTTAAGCCTAGCTTGGCGGAGAGGTAAAGGTCAC +bol3 CCGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGAGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAGCACACGGCTTCCGGGGTTGTTCTCGGCTATGTCTTTGGCGACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAGAAAGTAAAGCATTACTCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTGAGAGGTAAAGGTCGC +bra3 CTGCACCGTCCCCGAACAAAGCTGCCCCGACCAGGTCGTAAGGGCGAGCTTTGTTCGGTGGTCGGAACCCGAGGATGGTGGTTTCAGAGGTTGTGAGGAGAACACGGCTTCCGGGGTTGTTCTCAGCGATGTCTTTGGCCACACGGAGGCCTGTTACGCCTCCGTAGCAGCCTAGAAAATAAAGCATTACTCTGTTCACGTCGCTCTTTAGGCCTAGCTTGGCTGAGAGGTAAAGGTCGC +aly3 CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGAGCTTTGTTTGGTGGGCGAAACCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCCGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath4 A----------TGAAGGAGGACTACTTACGAGTGTTGGAAGCGGCAGCGGCAGTGACCGCAGCTGAAGAGCTTGTCGGAGAAACCGACATCACCGCACATGCAACACACTCTCTCCATGa +bna4 ATATATAGCCTTGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGTTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT +bol4 ATATATAGCCTTGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGTTTGTCGGAAAATCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT +bra4 ATATATGGCCTAGGAGAAGG--TACGTACGAGTGTTGAAAGCGGTTGCGGCAGTGCCCGCAGCGGAATAGCTTGTCGGAAAAGCCGACGTCGCCGCACATGCAGCACACTTTCTCCATGT +aly4 -----------TAAAGGAGGACTACTTACGAGTGCTGTAAGCGGCAGCGGCAGTGCCCGCAGCTGAAGAGCTTGTCGGAGAAACCGACGTCGCCGCACATGCAACATACTCTCTCCATGA +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath5 AACGGCGTCAAGGATCTCCTTCAAATTCTTGTGCCTTACCACCA-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTGCTC +bra5 GATCGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGGCGACATCTCCT------------CCTCCACCTCTATAGCTACCTGAAAACGTCGTCGCATCATCTGCTTTATC +bol5 GATGGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGgcgacatctcctcctccgcctccgcctCCGCCTCTATAGCTACCGCAATACGTCGTCGCATCATCTTCTTTATC +bna5 GATGGCATCAGCGATCTCTTTCAAATCCCTATGCCTAACGGCGACATCTCCT------CCTCCGCCTCCGCCTCTATAGCTACCGCAATACGTCGTCGCATCATCTTCTTTATC +aly5 AACGGCGTCGAGGATCTCCTTCAAATTCTTGTGCCTTACCACCA-ATTTGGT--------------------CTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTTGTC +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath6 TAAAAAATAAAGAAT---CTTACCATCACCACGACTGTTTGTTCTAGCCAACTGATAAATAGTGTAGCCTGAAGATGAAAGCTGGTGGTGGTACATGTTCACTAATTCCTCATTCCCAAC +bol6 TAAAATGGAGAACCTTTGCTTACCATCGCCACGACGGTTTGTTCTAGGCAACTGGAAAATATTGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCACCAATTCTTCATTTGCAAC +bna6 TAAAATGGAGAACCTTTACTTACCATCGCCACGACGGTTTGTTCTAGGCAACTGGAAAATATTGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCACCAATTCTTCATTTGCAAC +bra6 TAAAATGGAGAACCTTTACTTACCATCGCCACGACGGTTTGTTCTAGGCAACTGGAAAATATCGTAGCCTGCAGTGGCAAGGCGATCATGGTACATGTTCGCCAATTCTTCATTTGCAAC +aly6 TaaaaaaaaaaGAAT---GTTACCATCACCACGACTGTTTGTTCTAGCCAACTGGAAAATAGTGTAGCCAGAAGTGCCAAGCTGGTCGTGGTACATGTTGACTAATTCCTCATTCCCAAC +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath7 TGGATCTCTGGATCAGCTGTGCGACCAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACACGTGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGATTG---C +bra7 TGAAGCTCCGGGTCAGCAGTTCGGCCGGACGTTGATGATCCAAACGCTAAACTCCTTTGAACGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC +bol7 TGAAGCTCCGGATCAGCAGCTCGGCCGGACGTTGAAGATCCAAACGCTAAACGACTTTGAGCGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC +bna7 TGAAGCTCCGGATCAGCAGCTCGGCCGGACGTTGAAGATCCAAACGCTAAACGACTTTGAGCGACTGGTCGGACGCGGGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGACTGCATC +aly7 TGGATCTCTGGATCAGCGGTGCGGCCAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACACGGGAACGCGCCGCAGAAGCAAACGCTAATAATCGGGATTGCATC +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath8 TTGCTCTTCTAAACTCGGTCGGGGAGTATCGTCTACGATTTCTTCCGCCGCCGTTTTAGGGTTTGCAACATCATCCATACCCTGAAACACAAGCAATCACCAAAA---ACTTAAACAAAG +bra8 TTGCTCTTCTAAACTTGACCGTGGTTGTTCATCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCTACATCATCCATACCCTGAAGCATAATCAATCACAAAACAATATTT-AAAAGAG +bol8 TtgttgtTctaaacttgaccgtggttGTTCGTCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA- +bna8 TTGTTGTTCTAAACTTGACCGTGGTTGTTCGTCGACGATTTCTTCTGCCGCCGTCTTAGGGTTCGCCACGTCATCCATACCCTGAAGCATAATCAATAACAAAACAATATTT-AAAAGA- +aly8 TTGCTCTTCTAAACTCGGTCGTGGGGTGTCGTCTACGATTTCATCCGCCGCCGTTTTAGGGTTTGCAACATCATCCATAACCTGAAACACAAGGAATCAC--------ACTT-AACAGAG +// +# STOCKHOLM 1.0 +#=GF SQ 5 +ath9 CGTAGAAAAGGCTTGACCGCAAAATGGATATATTATATGTACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---TCTACAATATTGAAGCCAA-CTATGGTCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTACTCAAGC +bna9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC----------TATTTGGGCAAA--------------- +bol9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGTAGTAATCCTCTTCCCTGTAATATTCAAGCAAAGCTGTGAGTAAACTACAACC----------TATTTGGGCAAA--------------- +bra9 C-TACCAAATGC--AAAGATAAGGCAAA------------ACCTATGAGTTTCTCGAGCTTTGTTTGAAGAAGTAATCCTCTTCCCTATAATATCCAAGCAAAGCTGTGAGTAAACTGCAACC----------TAATTGGGCAAA--------------- +aly9 CATAGAAAAGGCTTGACCACAAACTGGA-----TATATATACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTAATCCTC---CCTAAAATATTGAAGCCAA-CTACTGTCAAACCACAATAAATTTCCCTATAGCTCCTCAAA--------------- +// |
b |
diff -r 16bcaef3dc1e -r c0c9d19bc7b2 test-data/sample_4_representatives.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_4_representatives.fa Tue Jul 18 01:43:49 2017 -0400 |
b |
@@ -0,0 +1,18 @@ +>ath1 +TAACTCGGAAGTTGTCGATTGAACAAACTTGAGGTTTTGTCGTTTCCACGGCTGTCGTAGACGGTGGCAGCTGCTGCTGCAGCGGTTGATGATAGTGGTAGGCGGAGAAGT +>ath2 +GGTCGAGAAAGGAACCGGCAATATCGAACCGGAAAAATCCGAGATAACCACGTTTTGCATAAACTGGTACATAAGCAGAACGTCACCGTTAACCAAAGCCATGTCCTTAAACCGGTCTCG +>ath3 +CTGCACCATCTCCAAACAGAGCAGCTCCAACTAGGTCGTACGGACGCGCTTTGTTTGGTGGCCGAAATCCGAGAATGGTGGTTTCAGAGGTTGTGAGCAGAACACGGCTTCCGGGGTTGTTCTCAGCAATGTCTTTGGCCACACGGAGGCCAGTCACACCTCCGTAGCATCCCAGAAAATACAGCATCACTCTGTTCACGTCATTCCTCAGGCCTAGCTTTGCTGAGAGGTAAAGGTCAC +>ath4 +ATGAAGGAGGACTACTTACGAGTGTTGGAAGCGGCAGCGGCAGTGACCGCAGCTGAAGAGCTTGTCGGAGAAACCGACATCACCGCACATGCAACACACTCTCTCCATGa +>ath5 +AACGGCGTCAAGGATCTCCTTCAAATTCTTGTGCCTTACCACCAATTTGGTCTTGTAGCAGTCGGAAGAAGTGGTAACATGGTCAGAGTGCTC +>ath6 +TAAAAAATAAAGAATCTTACCATCACCACGACTGTTTGTTCTAGCCAACTGATAAATAGTGTAGCCTGAAGATGAAAGCTGGTGGTGGTACATGTTCACTAATTCCTCATTCCCAAC +>ath7 +TGGATCTCTGGATCAGCTGTGCGACCAGACGTGGATGATCCAAACGCTAAACGCCTTTGAGCGATTGGTCGAACACGTGAACGCGCCGCTGAAGCAAACGCTAATAATCGGGATTGC +>ath8 +TTGCTCTTCTAAACTCGGTCGGGGAGTATCGTCTACGATTTCTTCCGCCGCCGTTTTAGGGTTTGCAACATCATCCATACCCTGAAACACAAGCAATCACCAAAAACTTAAACAAAG +>ath9 +CGTAGAAAAGGCTTGACCGCAAAATGGATATATTATATGTACCTATGAGTTTTTCGAGCTTTGTTTGAAGTAGTAATCCTCTCTACAATATTGAAGCCAACTATGGTCAAACCACAATCAAATTCCCTATAGCTCCTCaaaaaaaaCTACTCAAGC |