Mercurial > repos > greg > plant_tribes_gene_family_aligner
changeset 6:3384b6a842b0 draft
Uploaded
author | greg |
---|---|
date | Mon, 30 Oct 2017 09:52:00 -0400 (2017-10-30) |
parents | a73c2e65098e |
children | 2ac7090847f9 |
files | .shed.yml gene_family_aligner.py gene_family_aligner.xml macros.xml test-data/3722.faa test-data/3722.faa.aln test-data/3722.fna test-data/3722.fna.aln test-data/38889.faa test-data/38889.faa.aln test-data/38889.fna test-data/38889.fna.aln test-data/39614.faa test-data/39614.faa.aln test-data/39614.fna test-data/39614.fna.aln utils.py |
diffstat | 17 files changed, 2094 insertions(+), 281 deletions(-) [+] |
line wrap: on
line diff
--- a/.shed.yml Fri Aug 25 13:01:25 2017 -0400 +++ b/.shed.yml Mon Oct 30 09:52:00 2017 -0400 @@ -7,7 +7,7 @@ Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that utilize objective classifications of complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool aligns gene family sequences. -remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_aligner +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_aligner type: unrestricted categories: - Phylogenetics
--- a/gene_family_aligner.py Fri Aug 25 13:01:25 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -#!/usr/bin/env python -import argparse -import os - -import utils - -OUTPUT_DIR = 'geneFamilyAlignments_dir' - -parser = argparse.ArgumentParser() -parser.add_argument('--alignment_method', dest='alignment_method', help='Multiple sequence alignments method') -parser.add_argument('--automated_trimming', dest='automated_trimming', default=None, help='Trims alignments using trimAls ML heuristic trimming approach') -parser.add_argument('--codon_alignments', dest='codon_alignments', default=None, help='Flag for constructing orthogroup multiple codon alignments') -parser.add_argument('--gap_trimming', dest='gap_trimming', default=0, type=float, help='Remove sites in alignments with gaps of') -parser.add_argument('--iterative_realignment', dest='iterative_realignment', type=int, default=0, help='Maximum number of iterations') -parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution') -parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets') -parser.add_argument('--output', dest='output', help='Output dataset') -parser.add_argument('--output_dir', dest='output_dir', help='Output dataset files_path directory') -parser.add_argument('--output_dataset_collection', dest='output_dataset_collection', default=None, help='Output additional dataset collection of files.') -parser.add_argument('--pasta_iter_limit', dest='pasta_iter_limit', type=int, default=None, help='Maximum number of iteration that the PASTA algorithm will execute') -parser.add_argument('--pasta_script_path', dest='pasta_script_path', default=None, help='Path to script for executing pasta') -parser.add_argument('--remove_sequences', dest='remove_sequences', default=0, type=float, help='Remove sequences with gaps of') - -args = parser.parse_args() - -# Build the command line. -cmd = 'GeneFamilyAligner' -cmd += ' --orthogroup_faa %s' % args.orthogroup_faa -cmd += ' --alignment_method %s' % args.alignment_method -if args.alignment_method == 'pasta': - if args.pasta_script_path is not None: - cmd += ' --pasta_script_path %s' % args.pasta_script_path - if args.pasta_iter_limit is not None: - cmd += ' --pasta_iter_limit %d' % args.pasta_iter_limit -cmd += ' --num_threads %d' % args.num_threads -if args.codon_alignments is not None: - cmd += ' --codon_alignments' -if args.automated_trimming is not None: - cmd += ' --automated_trimming' -if args.gap_trimming > 0: - cmd += ' --gap_trimming %4f' % args.gap_trimming -if args.remove_sequences > 0: - cmd += ' --remove_sequences %4f' % args.remove_sequences -if args.iterative_realignment > 0: - cmd += ' --iterative_realignment %d' % args.iterative_realignment - -# Run the command. -utils.run_command(cmd) - -# Handle outputs. -if args.codon_alignments is None: - src_output_dir = OUTPUT_DIR -else: - src_output_dir = os.path.join(OUTPUT_DIR, 'orthogroups_aln') -if args.output_dataset_collection is not None: - utils.move_directory_files(src_output_dir, args.output_dataset_collection, copy=True) -utils.move_directory_files(src_output_dir, args.output_dir) -utils.write_html_output(args.output, 'Aligned gene family sequences', args.output_dir)
--- a/gene_family_aligner.xml Fri Aug 25 13:01:25 2017 -0400 +++ b/gene_family_aligner.xml Mon Oct 30 09:52:00 2017 -0400 @@ -1,36 +1,30 @@ -<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.2"> +<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.3.0"> <description>aligns integrated orthologous gene family clusters</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements_gene_family_aligner" /> + <requirements> + <requirement type="package" version="1.0.3">plant_tribes_gene_family_aligner</requirement> + </requirements> <command detect_errors="exit_code"><![CDATA[ -#set input_format = $input_format_cond.input_format -#set alignment_method_cond = $input_format_cond.alignment_method_cond #set alignment_method = $alignment_method_cond.alignment_method -#if str($input_format_cond.input_format) == 'ptortho': - #set output_codon_alignments = False -#else if str($input_format_cond.input_format) == 'ptorthocs' and str($input_format_cond.codon_alignments ) == 'no': - #set output_codon_alignments = False -#else: - #set output_codon_alignments = True -#end if - -python '$__tool_directory__/gene_family_aligner.py' +#set input_dir = 'input_dir' +mkdir $input_dir && +#for $i in $input: + #set filename = $i.file_name + #set name = $i.name + ln -s $filename $input_dir/$name && +#end for +GeneFamilyAligner --alignment_method $alignment_method #if str($alignment_method) == 'pasta': --pasta_script_path '$__tool_directory__/run_pasta.py' --pasta_iter_limit $alignment_method_cond.pasta_iter_limit #end if --num_threads \${GALAXY_SLOTS:-4} -#if str($input_format) == 'ptortho': - --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path' -#else: - ## str($input_format) == 'ptorthocs' - --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path' - #if str($input_format_cond.codon_alignments) == 'yes': - --codon_alignments true - #end if +--orthogroup_faa '$input_dir' +#if str($codon_alignments) == 'yes': + --codon_alignments #end if #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences #if str($remove_gappy_sequences) == 'yes': @@ -40,7 +34,7 @@ --gap_trimming $trim_type_cond.gap_trimming #else: ## str($trim_type) == 'automated_trimming' - --automated_trimming true + --automated_trimming #end if #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps @@ -51,90 +45,116 @@ #if str($remove_sequences_with_gaps_cond.iterative_realignment): --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment #end if - #if $output_codon_alignments: - --output '$output_aln_filtered_ca' - --output_dir '$output_aln_filtered_ca.files_path' - #else: - --output '$output_aln_filtered' - --output_dir '$output_aln_filtered.files_path' - #end if - #else: - #if $output_codon_alignments: - --output '$output_aln_trimmed_ca' - --output_dir '$output_aln_trimmed_ca.files_path' - #else: - --output '$output_aln_trimmed' - --output_dir '$output_aln_trimmed.files_path' - #end if - #end if -#else: - #if $output_codon_alignments: - --output '$output_aln_ca' - --output_dir '$output_aln_ca.files_path' - #else: - --output '$output_aln' - --output_dir '$output_aln.files_path' #end if #end if -#if str($output_dataset_collection) == 'yes': - --output_dataset_collection dataset_collection -#end if +&>proc.log ]]></command> <inputs> - <conditional name="input_format_cond"> - <param name="input_format" type="select" label="Classified orthogroup fasta files"> - <option value="ptortho">Proteins orthogroup fasta files</option> - <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option> + <param name="input" format="fasta" type="data_collection" collection_type="list" label="Integrated orthogroup fasta files" /> + <conditional name="alignment_method_cond"> + <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method"> + <option value="mafft" selected="true">MAFFT</option> + <option value="pasta">PASTA</option> </param> - <when value="ptortho"> - <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files"> - <validator type="empty_extra_files_path" /> - </param> - <expand macro="cond_alignment_method" /> - </when> - <when value="ptorthocs"> - <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files"> - <validator type="empty_extra_files_path" /> - </param> - <expand macro="cond_alignment_method" /> - <expand macro="param_codon_alignments" /> + <when value="mafft" /> + <when value="pasta"> + <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" /> </when> </conditional> - <expand macro="cond_remove_gappy_sequences" /> - <param name="output_dataset_collection" type="select" display="radio" label="Output additional dataset collection of files?"> + <param name="codon_alignments" type="select" label="Codon alignments"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> + <conditional name="remove_gappy_sequences_cond"> + <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <conditional name="trim_type_cond"> + <param name="trim_type" type="select" label="Trimming method"> + <option value="gap_trimming" selected="true">Gap score based trimming</option> + <option value="automated_trimming">Automated heuristic trimming</option> + </param> + <when value="gap_trimming"> + <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" /> + </when> + <when value="automated_trimming" /> + </conditional> + <conditional name="remove_sequences_with_gaps_cond"> + <param name="remove_sequences_with_gaps" type="select" label="Remove sequences"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" /> + <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" /> + </when> + </conditional> + <param name="output_pristine_alignments" type="select" display="radio" label="Output primary and intermediate alignments?" help="In addition to trimmed/filtered alignments"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + </when> + </conditional> </inputs> <outputs> - <data name="output_aln" format="ptalign" label="${tool.name} (proteins orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> - </data> - <data name="output_aln_ca" format="ptalignca" label="${tool.name} (protein and coding sequences orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> - </data> - <data name="output_aln_filtered" format="ptalignfiltered" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> - </data> - <data name="output_aln_filtered_ca" format="ptalignfilteredca" label="${tool.name} (filtered protein and coding sequences orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> - </data> - <data name="output_aln_trimmed" format="ptaligntrimmed" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> - </data> - <data name="output_aln_trimmed_ca" format="ptaligntrimmedca" label="${tool.name} (trimmed protein and coding sequences orthogroup alignments) on ${on_string}"> - <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> - </data> - <collection name="dataset_collection" type="list" label="${tool.name} (dataset collection) on ${on_string}"> - <discover_datasets pattern="__name__" directory="dataset_collection" format="fasta" /> - <filter>output_dataset_collection == 'yes'</filter> + <collection name="primary_faa" type="list" label="${tool.name} (primary orthogroup protein alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_faa" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> + </collection> + <collection name="primary_fna" type="list" label="${tool.name} (primary orthogroup codon alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_aln_fna" format="fasta" /> + <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> + </collection> + <collection name="pristine" type="list" label="${tool.name} (intermediate alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/other_orthogroups_aln" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['output_pristine_alignments'] == 'yes'</filter> </collection> + <collection name="trimmed_faa" type="list" label="${tool.name} (trimmed orthogroup protein alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_faa" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter> + </collection> + <collection name="trimmed_fna" type="list" label="${tool.name} (trimmed orthogroup codon alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_trimmed_aln_fna" format="fasta" /> + <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no'</filter> + </collection> + <collection name="filtered_faa" type="list" label="${tool.name} (filtered orthogroup protein alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_faa" format="fasta" /> + <filter>remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter> + </collection> + <collection name="filtered_fna" type="list" label="${tool.name} (filtered orthogroup codon alignments) on ${on_string}"> + <discover_datasets pattern="__name__" directory="geneFamilyAlignments_dir/orthogroups_filtered_aln_fna" format="fasta" /> + <filter>codon_alignments == 'yes' and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes'</filter> + </collection> + </outputs> <tests> - <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed. <test> + <param name="input"> + <collection type="list"> + <element name="3722.faa" value="3722.faa"/> + <element name="3722.fna" value="3722.fna"/> + <element name="38889.faa" value="38889.faa"/> + <element name="38889.fna" value="38889.fna"/> + <element name="39614.faa" value="39614.faa"/> + <element name="39614.fna" value="39614.fna"/> + </collection> + </param> + <param name="codon_alignments" value="yes"/> + <output_collection name="primary_faa" type="list"> + <element name="3722.faa.aln" file="3722.faa.aln" ftype="fasta"/> + <element name="38889.faa.aln" file="38889.faa.aln" ftype="fasta"/> + <element name="39614.faa.aln" file="39614.faa.aln" ftype="fasta"/> + </output_collection> + <output_collection name="primary_fna" type="list"> + <element name="3722.fna.aln" file="3722.fna.aln" ftype="fasta"/> + <element name="38889.fna.aln" file="38889.fna.aln" ftype="fasta"/> + <element name="39614.fna.aln" file="39614.fna.aln" ftype="fasta"/> + </output_collection> </test> - --> </tests> <help> This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary @@ -145,13 +165,13 @@ **Required options** - * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences. + * **Integrated orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyIntegrator tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences. * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments. - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations. - * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires protein and their corresponding coding sequences to be provided as input data. + * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires both protein and their corresponding coding sequence orthogroup fasta files to be present in the GeneFamilyAligner input data that was produced by the GeneFamilyIntegrator. **Other options** @@ -167,7 +187,7 @@ - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect. - * **Output additional dataset collection of files** - selecting 'Yes' will produce an additional output dataset collection whose elements are copies of the directories of files (these elements can be viewed with visualization tools). + * **Output primary and intermediate alignments** - selecting 'Yes' will produce a dataset collection of primary and intermediate alignments, the elements of which can be viewed with viaula tools, in addition to the final trimmed and/or filtered alignments dataset collection. .. _trimAl: http://trimal.cgenomics.org
--- a/macros.xml Fri Aug 25 13:01:25 2017 -0400 +++ b/macros.xml Mon Oct 30 09:52:00 2017 -0400 @@ -1,47 +1,6 @@ <?xml version='1.0' encoding='UTF-8'?> <macros> <token name="@WRAPPER_VERSION@">1.0</token> - <xml name="requirements_assembly_post_processor"> - <requirements> - <requirement type="package" version="1.0.2">plant_tribes_assembly_post_processor</requirement> - </requirements> - </xml> - <xml name="requirements_gene_family_aligner"> - <requirements> - <requirement type="package" version="1.0.2">plant_tribes_gene_family_aligner</requirement> - </requirements> - </xml> - <xml name="requirements_gene_family_classifier"> - <requirements> - <requirement type="package" version="1.0.2">plant_tribes_gene_family_classifier</requirement> - </requirements> - </xml> - <xml name="requirements_gene_family_integrator"> - <requirements> - <requirement type="package" version="1.0.2">plant_tribes_gene_family_integrator</requirement> - </requirements> - </xml> - <xml name="requirements_kaks_analysis"> - <requirements> - <requirement type="package" version="1.0.2">plant_tribes_kaks_analysis</requirement> - </requirements> - </xml> - <xml name="requirements_ks_distribution"> - <requirements> - <requirement type="package" version="1.3.2">r-optparse</requirement> - </requirements> - </xml> - <xml name="requirements_gene_family_phylogeny_builder"> - <requirements> - <requirement type="package" version="1.0.2">plant_tribes_gene_family_phylogeny_builder</requirement> - </requirements> - </xml> - <xml name="param_codon_alignments"> - <param name="codon_alignments" type="select" label="Codon alignments"> - <option value="yes" selected="true">Yes</option> - <option value="no">No</option> - </param> - </xml> <xml name="param_method"> <param name="method" type="select" label="Protein clustering method"> <option value="gfam" selected="true">GFam</option> @@ -49,74 +8,12 @@ <option value="orthomcl">OrthoMCL</option> </param> </xml> - <xml name="param_options_type"> - <param name="options_type" type="select" label="Options Configuration"> - <option value="basic" selected="true">Basic</option> - <option value="advanced">Advanced</option> - </param> - </xml> - <xml name="param_orthogroup_fna"> - <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences"> - <option value="yes" selected="true">Yes</option> - <option value="no">No</option> - </param> - </xml> <xml name="param_scaffold"> <param name="scaffold" type="select" label="Gene family scaffold"> <options from_data_table="plant_tribes_scaffolds" /> <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." /> </param> </xml> - <xml name="param_sequence_type"> - <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)"> - <option value="protein" selected="true">Amino acid based</option> - <option value="dna">Nucleotide based</option> - </param> - </xml> - <xml name="cond_alignment_method"> - <conditional name="alignment_method_cond"> - <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method"> - <option value="mafft" selected="true">MAFFT</option> - <option value="pasta">PASTA</option> - </param> - <when value="mafft" /> - <when value="pasta"> - <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" /> - </when> - </conditional> - </xml> - <xml name="cond_remove_gappy_sequences"> - <conditional name="remove_gappy_sequences_cond"> - <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - <when value="no" /> - <when value="yes"> - <conditional name="trim_type_cond"> - <param name="trim_type" type="select" label="Trimming method"> - <option value="gap_trimming" selected="true">Gap score based trimming</option> - <option value="automated_trimming">Automated heuristic trimming</option> - </param> - <when value="gap_trimming"> - <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" /> - </when> - <when value="automated_trimming" /> - </conditional> - <conditional name="remove_sequences_with_gaps_cond"> - <param name="remove_sequences_with_gaps" type="select" label="Remove sequences"> - <option value="no" selected="true">No</option> - <option value="yes">Yes</option> - </param> - <when value="no" /> - <when value="yes"> - <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" /> - <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" /> - </when> - </conditional> - </when> - </conditional> - </xml> <xml name="citation1"> <citation type="bibtex"> @misc{None,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3722.faa Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,161 @@ +>gnl_Glyma1.01_PACid_16266208 +MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG +VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK +RNSSEMNFGKAENSSVFDASYQNFCFGVNQLQDIKKGKGGILGGGGRSRHRSGRKQKMFYGHDV +>gnl_Glyma1.01_PACid_16266209 +MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG +VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK +RNSSEMNFGKAENSSVFDASYQNFCFGTGEPTPRYKEGKGGNSRRRR +>gnl_Glyma1.01_PACid_16266210 +MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG +VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK +RNSSEMNFGKAENSSVFDASYQNFCFGVGHVNYHYQ +>gnl_Glyma1.01_PACid_16301083 +MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG +VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK +HNSSEMNFGKAENSSVFDAGYQNFCFGVNQLQDIKKKKGGILGGGRSRHRNGRKQNMSYGHDVSSNDYPGISTK +>gnl_Glyma1.01_PACid_16301085 +MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG +VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK +HNSSEMNFGKAENSSVFDAGYQNFCFGVGHVNYHYQ +>gnl_Glyma1.01_PACid_16301084 +MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG +VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK +HNSSEMNFGKAENSSVFDAGYQNFCFGTGEPTPRYKEEKGGNSRRR +>gnl_Medtr3.5_Medtr8g022310.1 +MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG +VYDSDDDENGMGDFLNEMVTMMSQTKSNENGEESFEELQQLFDDMFQADIGLNGSTSLNASGCSTSSTFMTFSESSNSNK +RNSTQMNFGKAEDSSSFGANYQNFCFGMKHLQEDVEKEKGGILEGGGSKKQRKGRKQKISCGHVSSNDHPGISAN +>gnl_Medtr3.5_Medtr8g022310.2 +MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG +VYDSDDDENGMGDFLNEMVTMMSQTKSNENGEESFEELQQLFDDMFQADIGLNGSTSLNASGCSTSSTFMTFSESSNSNK +RNSTQMNFGKAEDSSSFGANYQNFCFGVNLVNYHYQ +>gnl_Musac1.0_GSMUA_Achr6T31040_001 +MAAEEDKSGDFYAVLGLRKECSETELRNAYKKLAMRWHPDKCLASGNAQIVGEAKEKFQEIQKAYSVLSDSNKRFLYDVG +VYDNDDDNDENGMGDFIGEMLEMMSQTKPNENSQDSFQELQELFVEMFQDDLDAGFGGSIFHDCPWAQPTNGQDCWTSSG +LHFANGRSKCGNKRGNSAVNLGKVNLEELEHGTSDFYFGLNDAAQPSQGKGGSNNKRRNGRKQKVSSNHDVSS +>gnl_Musac1.0_GSMUA_Achr9T18140_001 +MAAGEEKIGDFYTVLGLRKECSEAELRIAYKKLAMRWHPDKCSASGNHRRMEEAKEKFQEIQKAYSVLSDSSKRFLYDVG +IYDNEDDNDEKGMGDFIGEIAQMMSQTKSGENGHDSFEELQRMFLDMFQDDLDAGFGDSSIHSGPQARPTDGLNCSMPSG +LQFADGGNNGSNKRGNSEKAKLDGLENSSTGFCFGLNDAGQSSKGKGSANSKRRNGRKQKVSSKHDVSSSDAEVSF +>gnl_Musac1.0_GSMUA_Achr8T23700_001 +MASDMDASGDFYSVLGLKKECSEAELRNAYKKLALKWHPDKCSASGNEIRMKEAKQQFQEIQKAYSVLSDSNKRFLYDVG +AYDKDDDKDEEGMVEFLGEMAQMMRQTKCCGSGQESFEQLQQMFVEMFHDDLDAGFCGHSSATSGAASCGNKRDNSAMDS +GKRKPDELDPAAIGFCLGTKDAGQSSKGRGSNSKRRNRRKQKASSKHDNSSHNAKVSA +>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001 +MEGDEEKSGDFYAVLGLKKEGSMAELKNAYKKLAMKWHPDKCPASGNKIRMDKAKEKFQEIQKAYSVLSDSNKRFLYDVG +VYDKDDEEDEEGMGDFIGEIAQMMSQSKPSGSGHESLEELHRQVVEMFLDELDAGDRFSSANQGASSCDGRDDGGGNKRG +NWAVDWGKEKLNELGPGTGGFCFGVSRRVHSFDLMIDVVHLIHSDLTLE +>gnl_Orysa6.0_PACid_16843526 +MADGGEKCRDAAGEGGGGGDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSSSAKHMEEAKEKFQEIQGAYSVLSD +SNKRFLYDVGVYDDDDNDDDNLQGMGDFIGEMAQMMSQARPTRQESFKELQQLFVDMFQADLDSGFCNGPSKCYHTQAQS +QTRTSSTSPSMSPSPPPPVATEAESPSCNGINKRGSSAMDSGKPPRASEVGSGQSQSGFCFGKSDAKQAAKTRSGNTASR +RRNGRKQKVSSKHDVSSEDEMPGSQWHGVA +>gnl_Orysa6.0_PACid_16843528 +MADGGEKCRDAAGEGGGGGDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSSSAKHMEEAKEKFQEIQGAYSVLSD +SNKRFLYDVGVYDDDDNDDDNLQGMGDFIGEMAQMMSQARPTRQESFKELQQLFVDMFQADLDSGFCNGPSKCYHTQAQS +QTRTSSTSPSMSPSPPPPVATEAESPSCNGINKRGSSAMDSGKPPRASEVGSGQSQSGFCFGQKSDAKQAAKTRSGNTAS +RRRNGRKQKVSSKHDVSSEDEMPGSQWHGVA +>gnl_Poptr2.2_PACid_18217800 +MANGGEDKWKSNDLYQVLGLNKECTDTELRSAYKKLALRWHPDRCSASGNSKFVEEAKKKFQAIQQAYSVLSDTNKRFLY +DVGVDDSDDDENGMGDFLNEMAVMMSQTKPSENMEESLEELQELFDEMFQEDLHSFGIDSQAAPSCPPSYVSYSESSNSN +NKRVSADMNLGKTKVDDSSSFNSHFEKFCLGTGGTAATFQEGEGGSKRRNSRRSQRQTKARQETKSFFGL +>gnl_Poptr2.2_PACid_18234651 +MENGGEEKGKSNDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASENSRFVDEAKKKFQTIQQAYSVLSDTNKRFLY +DVGVYDSEDDENGMGGFMNEMAAMMSQTKPHENVEESFEELQGLFEEMFQEDLDSFGIACQATTCVSYSESSNSNDKRVS +VDMNLKKTKVDDSSGFNSHVEKFCLGVSGTPAIFQEGEGSKRRSSRRNRR +>gnl_Poptr2.2_PACid_18234649 +MLRMENGGEEKGKSNDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASENSRFVDEAKKKFQTIQQAYSVLSDTNKR +FLYDVGVYDSEDDENGMGGFMNEMAAMMSQTKPHENVEESFEELQGLFEEMFQEDLDSFGIACQATTCVSYSESSNSNDK +RVSVDMNLKKTKVDDSSGFNSHVEKFCLGVEHQQSFKKGKGVRGGVQGGTGGRERKGRKQEVSSGYDVSSHDHGISAS +>gnl_Poptr2.2_PACid_18234650 +MENGGEEKGKSNDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASENSRFVDEAKKKFQTIQQAYSVLSDTNKRFLY +DVGVYDSEDDENGMGGFMNEMAAMMSQTKPHENVEESFEELQGLFEEMFQEDLDSFGIACQATTCVSYSESSNSNDKRVS +VDMNLKKTKVDDSSGFNSHVEKFCLGVEHQQSFKKGKGVRGGVQGGTGGRERKGRKQEVSSGYDVSSHDHGISAS +>gnl_Soltu3.4_PGSC0003DMP400016105 +MGNDYYAVLGLKKECTETELRNAYKKLALKWHPDRCSASGNSKFVDEAKKKFQAIQEAYSVLSDANKRFLYDVGVYDSGD +DDDENGMGDFLNEMAAMMSQNKSNENQEETFEELQDMFDEIFNSDNGMSSSSSSSSRTGTPSMCSTTSSTSSSETFFTFS +NKRSSGEMKSGKGDSCQFQGFCEGTGGASGKSNERERSRRKNSKSGRKQ +>gnl_Soltu3.4_PGSC0003DMP400016106 +MGNDYYAVLGLKKECTETELRNAYKKLALKWHPDRCSASGNSKFVDEAKKKFQAIQEAYSVLSDANKRFLYDVGVYDSGD +DDDENGMGDFLNEMAAMMSQNKSNENQEETFEELQDMFDEIFNSDNGMSSSSSSSSRTGTPSMCSTTSSTSSSETFFTFS +NKRSSGEMKSGKGDSCQFQGFCEGVEHLEKAMKENGVGGKIPRVDGSNRMDAKRQKVLS +>gnl_Ambtr1.0.27_AmTr_v1.0_scaffold00007.329 +MAPRGEKDSDFYAILGLKKECSASDLRNAYKRLALRWHPDRCSASGNTKFVEECKKKFQAIQQAYSVLSDANKRFLYDVG +AYGSDDDDQGMGEFLGEMAVMMSQTKPSEKGPESFEDLQNLFQEMFERDLDMFKSSTSHNNNNDNNNNNHRSSDNNNCSS +VHCFSNTNKRNCSDMNAGEASEVGRFAFSCYATEFLHKQTFSVGADDVRSESSNKRRNGRKQKSTSSSRKS +>gnl_Aquco1.0_PACid_18141277 +MASEEEASDFYKVLGLKNNCSSLELRNAYKKLALKWHPDRCAASGNSKFVEEAKKNFQAIQEAYSVLSDEQKRFMYDVGV +YDKDDDDENEDMGDFLGEMMSMMKQENTSADGQQSFEDLQNLFQEMVQNDKEFYNPASQNSSIYNASNNMFSFSNNENLN +NASNNTFSSFYNENLNSSNKKSCSSMSAENTKVDFNMESLDFRSFSIGLEGGTSFQNSKGRGVTGRRTGRKQKGSSCNDM +SSHDSKILA +>gnl_Arath10_AT3G14200.1 +MASSNSEKINENLYAVLGLKKECSKTELRSAYKKLALRWHPDRCSSMEFVEEAKKKFQAIQEAYSVLSDSNKRFLYDVGA +YNTDDDDDQNGMGDFLNEMATMMNQSKPSDNNTGDSFEQLQDLFNEMFQGDAAAFPSSSSCSTSNFTSSRSFVFDTNSQR +SSSFATSSMGMNNDPFGYDPRAHSFSLGVDHQQEFKKGKNNGGRRNRRKNNVPSAGHETSSSNNYGVPTS +>gnl_Bradi1.2_Bradi3g60090.1 +MATGGDKCGGKPAAAGVGGGDLYSVLGVNKECSDADLKVAYRKLAMRWHPDRCSSSSSTKHMEEAKEKFQEIQGAYSVLS +DANKRFLYDVGVYEEHEEEDDDTLQGMGDFLGEMAHMMSQTQPARQESFEELQQLFVDMFQSDIESGFCNGPAKDHDPVQ +RQTRTFSTPPSPSPSPPPPLATVDEAASCNGINKRGSSAMGSGKPPRAGEVSGGHGQSEFCFGMSDAKQAPKARGGNASR +RRNGQKQKLSSKHDVSSGDEMPRPHAAV +>gnl_Carpa1.181_PACid_16420351 +MADGEDKNNSDLYAVLGLNKECTPAELRNAYKKLAMRWHPDRCSASGNSMFVEEAKKKFQAIQEAYSVLSDANKRFLYDV +GAYESDDDENGMGDFLNEMAAMMSQTKPNENGNAQESFEELQELFQEMFQGDMGFNTFGSSSQPTTSSCSASSAYATCSE +TSNPNNNKRNSSEMNYGKKKVDDSSGFHAHFQTFCLGVEQQQDFKKGKEARGGIRGKPGGSRRQGRKQKVSSRHNVSSND +LGISAS +>gnl_Frave2.0_gene05408 +MAGGKWVPPPLSQFHLHIKRFRRRQKVSGSSGNTEPSGYTDCKNISNRRMEEKGNDFYAVMGLKKECSDSELRNAYKKLA +LIWHPDRCSASGNSKFVEEAKKKFQDIQQAYSVLSDANKRFLYDVGAYESDDDENGMGDFLNEMAVMMSQTKPNENGGES +FEQLQELFEEMFQGDIEGFSSCSQPPTSCSTSSSSYALYCENSTPSNKRNSSAMNYGNATLDSSGFDAHFHNFCVGTGGK +PAKDREGDARKRKDSRRSNR +>gnl_Mimgu1.0_PACid_17694730 +MAADEEKSSDFYGVLGLRKECTAAELRVAYKKLAMKWHPDRCSASGNLKYVEEAKNKFQAVQQAYSVLSDANKRFLYDVG +IYDSEDDADENGMGDFLNEMVAMMGQSKPNENKNESFQELQDLFEEIFNNDAEEVFKIPPPHFPYQDSCSETRTASNKRN +AREMGSVNFSNIEATPFEGFCIGENVIFGGERIQTRPGGGSRRTKPKISTSIDGLIS +>gnl_Nelnu1.0_NNU_010544-RA +MNLLLQKWHPDRCSSSGNSKFVEDSKKKFQAIQEAYSVLSDENKRFLYDVGVYDCDDDDDDENGMGEFLGEMATMMSQIK +PSENGPESLEKLQELFEEMFQRDMDDGFFSPSPQCASFSSSCSSSSSSTTYFSYNNNKHDNKRNCSDISSMDDFYTFGTD +SIQFSNFCIGVEGGEDSKVRGGKSRRKSNRRQKVSSSKHDPSCR +>gnl_Solly2.3_Solyc03g123560.2.1 +MEDKSNDYYAVLGLKKECTDTELRNAYKKLALKWHPDRCSASGNLKFVDEAKKQFQAIQEAYSVLSDANKKFLYDVGVYD +SGDDDDENGMGDFLNEMAAMMSQNKSNENQGEETFEELQDMFNEMFNSDNGTFSSSSSSSSSWTGTPSMCSTTSSTSSSE +TFLTFPNKRSSGEMKSGSSVRGDSCQFQGFCVGAGGTSGKCNERERSWRKNSKSGRKH +>gnl_Sorbi1.4_PACid_1968370 +MDAGGEKFSDAAAAEGGEGGGDLYAVLGLKKECSDADLKVAYRKLAKKWHPDKCSSSSSVKHMEEAKEKFQEIQGAYSVL +SDANKRLLYDVGVYDDEDDEDSMQGMGDFIGEMAQMMSQVRPTRQESFEELQQLFVDMFQSDIDSGFCNGSAKDQVQGQA +KSRTCSTSPSSSPSPPPPPTIVKEAEVSSCNGFNKRGSSAMDSGKPPRPVEGGAGQAGFCFGVSDTKQTPKPRGPNTSRR +RNGRKQKLSSKHDVSSEDETAGS +>gnl_Thepa2.0_Tp3g12470 +MASNNSEKGNDDLYGVLGLKKECTTTELRTAYKKLALRWHPDRCSSMGTPEFVDEAKKKFQAIQEAYSVLSDSNKRFLYD +VGAYNSDDEDQNGMGDFLNEMAAMMNQSKPSENNSGDSFEQLQDLFNEMFQGDAAAFSSSSSSSCSASTFTSSCSFVFDT +NSQRSPFETSSMGTNDLFGFDHSAHTFSLGVEHQQDFKKGKNSGGRRNRRKNNAQSAAHETASSNNYGVPTS +>gnl_Theca1.0_Tc06_g010450 +MANGEEKNNDFYAVLGLNKECTPTELRTAYKKLALRWHPDRCSASGNSKFVEEAKKKFQAIQQAYSVLSDSNKRFLYDVG +AYDSDDDENGMGDFLNEMAGMMSQTKSNENGGESFEELQELFEEMFQADIDSFESTGQSTPSCSASSSFGSYGESSSSNK +RNSSEMSSVETRLESSSSFDAQFHSFCLGVEHRQDIKQHRGARGGMRGAAGGSRRRNGRKQKVSSGHDVTSNDCGISAS +>gnl_Vitvi12X_PACid_17827068 +MAAGEEKSNDFYAVLGLKKECTASELRNAYKRLALMWHPDRCSSSGNSKFVEEAKKKFQAIQEAYSVLSDANKRFLYDVG +AYDSDDDENGMGDFLNEMAVMMSQTKSNENGKESFEELQELFEDMFQRDVDAFNSASHHPMNSFPSSTSTSSYCESSNAN +NKRNSAEMGSGRMMSAGESSAFDAHFQSFCFGTGGTPGRFQEGERSKRRNSRRSQR +>gnl_Selmo1.0_PACid_15401289 +MEKRKEDPYTVLGVQKSSSSSEIRSAYRKLAMKWHPDKQHSLEDQAKAKFQGIQEAYSVLSDDKKRVLYDSGLYDEGDDE +VS +>gnl_Orysa6.0_PACid_16864430 +MARGGGGGGGADADLYAVLGLSRECTDADLRLAYRKLAMIWHPDRCSVAGGSASAAGVDEAKERFQEIQGAYSVLSDSNK +RFLYDVGVYDGNDGDDDDDEADLSGMGDFLGEMAQMMSQATPAESFEELQQLFVDMFQDDIDAGLCQSTPPPPSWPSPPA +AANARSPAAAATSRKGVNKRCSPAAMDMDSGLSSLLGISGFCFEAPWTSQDASTAAGGGGGKRRKQRPPPASHNV +>gnl_Sorbi1.4_PACid_1982925 +MAATSHCGNIQDQDEEASAPGAADLYAVLGLNRECTDAELRVAYRRLAMIWHPDRCSASGSSPARMEEAKERFQEIQGAY +SVLSDSNKRLLYDVGVYDSDDDEADLSGMGDFLGEMADMMSQATPTETFEELQQVFVDMFQDDLDDAGFFGGLPTTGRRA +QAPSTSLPPSVSSSPLRPTPAAGRSKGPQATPSSSFKGVERRGSTSTAKRPRPNGSAGLESDLGLSGFCFMVSKEMSKSK +ERQAVWASDDGDRSTDGKQRLSTSRDVSGGGMSRSLQGQSSKNLLQCMASKS +>gnl_Medtr3.5_Medtr8g022310.3 +MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG +VYDSDDDENVRHLFHTIHELGTLFCVMFCFFISLRGEKRSNLNLTFSLSH +>gnl_Nelnu1.0_NNU_000115-RA +MEVDSHRSSPSYYTILGVDQNSSASEIRNAYRKLAMQWHPDKWTKTPSLLEKAKSKFQQIQEAYSGGLLVFMLSDQGKRT +LYDVGLYDPDDETNDEVGLRRFHAGDDISHERCEETGEEIQLGGTTGDVSGNVTRAGVEDGECWWCVVVRWSCSLKEELK +EGQMGIISESDDAGHDTPSLPHLHGSELELLGRTGCCN +>contig_7 +ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG +NESGEISGKKNTRKGKGDX
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3722.faa.aln Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,312 @@ +>gnl_Glyma1.01_PACid_16266208 +---MANE-------------------------------------------------GKKS +NNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATG---NLELVEEAKKKFQEIREAY +S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSHTKSNENG +--EESFEELQ------QLFEDMFQADI--GL----------DGGPSL------------- +----ASSDSSTSSA------------------YMTYSESSS---------SNKRNSS-EM +NFGK---AENSSVFDASY------QNFCFG-VN--QLQDIKKGK---GGILGGGGRSRHR +SGRKQKMFY-GHDV-------------------------- +>gnl_Glyma1.01_PACid_16266209 +---MANE-------------------------------------------------GKKS +NNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATG---NLELVEEAKKKFQEIREAY +S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSHTKSNENG +--EESFEELQ------QLFEDMFQADI--GL----------DGGPSL------------- +----ASSDSSTSSA------------------YMTYSESSS---------SNKRNSS-EM +NFGK---AENSSVFDASY------QNFCFG-TG--EPTPRYKEG--------KGGNSRRR +R--------------------------------------- +>gnl_Glyma1.01_PACid_16266210 +---MANE-------------------------------------------------GKKS +NNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATG---NLELVEEAKKKFQEIREAY +S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSHTKSNENG +--EESFEELQ------QLFEDMFQADI--GL----------DGGPSL------------- +----ASSDSSTSSA------------------YMTYSESSS---------SNKRNSS-EM +NFGK---AENSSVFDASY------QNFCFG-VG--HVNYHYQ------------------ +---------------------------------------- +>gnl_Glyma1.01_PACid_16301083 +---MADE-------------------------------------------------GNKS +NNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATG---NSELVEEAKKKFQEIREAY +S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSQTKSNENG +--EESFEELQ------QLFEDMFEADI--GL----------DGGPSL------------- +----ASSDCSTSSA------------------YMTYSESSS---------SNKHNSS-EM +NFGK---AENSSVFDAGY------QNFCFG-VN--QLQDIKKKK---GGIL-GGGRSRHR +NGRKQNMSY-GHDVSSNDYPGISTK--------------- +>gnl_Glyma1.01_PACid_16301085 +---MADE-------------------------------------------------GNKS +NNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATG---NSELVEEAKKKFQEIREAY +S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSQTKSNENG +--EESFEELQ------QLFEDMFEADI--GL----------DGGPSL------------- +----ASSDCSTSSA------------------YMTYSESSS---------SNKHNSS-EM +NFGK---AENSSVFDAGY------QNFCFG-VG--HVNYHYQ------------------ +---------------------------------------- +>gnl_Glyma1.01_PACid_16301084 +---MADE-------------------------------------------------GNKS +NNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATG---NSELVEEAKKKFQEIREAY +S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSQTKSNENG +--EESFEELQ------QLFEDMFEADI--GL----------DGGPSL------------- +----ASSDCSTSSA------------------YMTYSESSS---------SNKHNSS-EM +NFGK---AENSSVFDAGY------QNFCFG-TG--EPTPRYKEE--------KGGNSRRR +---------------------------------------- +>gnl_Medtr3.5_Medtr8g022310.1 +---MANE-------------------------------------------------GNKS +NDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASG---NVKFVEEAKKKFQAIQEAY +S------VLSDSNKRLMYDVGVYDS--------DDDENGMGDFLNEMVTMMSQTKSNENG +--EESFEELQ------QLFDDMFQADI--GL----------NGSTSL------------- +----NASGCSTSST------------------FMTFSESSN---------SNKRNST-QM +NFGK---AEDSSSFGANY------QNFCFG-MK--HLQEDVEKE--KGGILEGGGSKKQR +KGRKQKISC-GH-VSSNDHPGISAN--------------- +>gnl_Medtr3.5_Medtr8g022310.2 +---MANE-------------------------------------------------GNKS +NDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASG---NVKFVEEAKKKFQAIQEAY +S------VLSDSNKRLMYDVGVYDS--------DDDENGMGDFLNEMVTMMSQTKSNENG +--EESFEELQ------QLFDDMFQADI--GL----------NGSTSL------------- +----NASGCSTSST------------------FMTFSESSN---------SNKRNST-QM +NFGK---AEDSSSFGANY------QNFCFG-VN--LVNYHYQ------------------ +---------------------------------------- +>gnl_Musac1.0_GSMUA_Achr6T31040_001 +---MAAE-------------------------------------------------EDKS +GDFYAVLGLRKECSETELRNAYKKLAMRWHPDKCLASG---NAQIVGEAKEKFQEIQKAY +S------VLSDSNKRFLYDVGVYDND------DDNDENGMGDFIGEMLEMMSQTKPNENS +--QDSFQELQ------ELFVEMFQDDL--DAG---------FGGSIFH-DCPWAQP---- +---TNGQDCWTSSG------------------LHFANGRSK--------CGNKRGNS-AV +NLGK----VNLEELEHGT------SDFYFG-LN--DAAQPSQGKGGS--------NNKRR +NGRKQKVSS-NHDVSS------------------------ +>gnl_Musac1.0_GSMUA_Achr9T18140_001 +---MAAG-------------------------------------------------EEKI +GDFYTVLGLRKECSEAELRIAYKKLAMRWHPDKCSASG---NHRRMEEAKEKFQEIQKAY +S------VLSDSSKRFLYDVGIYDNE------DDNDEKGMGDFIGEIAQMMSQTKSGENG +--HDSFEELQ------RMFLDMFQDDL--DAG---------FGDSSIH-SGPQARP---- +---TDGLNCSMPSG------------------LQFADGGNN--------GSNKRGNS-EK +--------AKLDGLENSS------TGFCFG-LN--DAGQSSKGKGSA--------NSKRR +NGRKQKVSS-KHDVSSSD-AEVSF---------------- +>gnl_Musac1.0_GSMUA_Achr8T23700_001 +---MASD-------------------------------------------------MDAS +GDFYSVLGLKKECSEAELRNAYKKLALKWHPDKCSASG---NEIRMKEAKQQFQEIQKAY +S------VLSDSNKRFLYDVGAYDKD------DDKDEEGMVEFLGEMAQMMRQTKCCGSG +--QESFEQLQ------QMFVEMFHDDL--DAG---------F------------------ +--------CGHSSA---------------------TSGAAS--------CGNKRDNS-AM +DSGK----RKPDELDPAA------IGFCLG-TK--DAGQSSKGRGS---------NSKRR +NRRKQKASS-KHDNSSHN-AKVSA---------------- +>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001 +---MEGD-------------------------------------------------EEKS +GDFYAVLGLKKEGSMAELKNAYKKLAMKWHPDKCPASG---NKIRMDKAKEKFQEIQKAY +S------VLSDSNKRFLYDVGVYDKD------DEEDEEGMGDFIGEIAQMMSQSKPSGSG +--HESLEELH------RQVVEMFLDEL--DAGD-------RFSSANQ------------- +----GASSCDGRDD----------------------------------GGGNKRGNW-AV +DWGK----EKLNELGPGT------GGFCFG-VS------------------------RRV +HSFDLMIDV-VHLIHSDL-------------------TLE +>gnl_Orysa6.0_PACid_16843526 +---MADGG-------------------------------------EKC-RDAAG-EGGGG +GDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSS---SAKHMEEAKEKFQEIQGAY +S------VLSDSNKRFLYDVGVYDDDD----NDDDNLQGMGDFIGEMAQMMSQARPTR-- +--QESFKELQ------QLFVDMFQADL--DSG---------FCNGPSKCYHTQAQSQTRT +SSTSPSMSPSPPPP------------------VATEAESPS------CNGINKRGSS-AM +DSGK-PPRASEVGSGQSQ------SGFCFG-KS--DAKQAAKTRSGNT-------ASRRR +NGRKQKVSS-KHDVSSEDEMPGSQW-----------HGVA +>gnl_Orysa6.0_PACid_16843528 +---MADGG-------------------------------------EKC-RDAAG-EGGGG +GDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSS---SAKHMEEAKEKFQEIQGAY +S------VLSDSNKRFLYDVGVYDDDD----NDDDNLQGMGDFIGEMAQMMSQARPTR-- +--QESFKELQ------QLFVDMFQADL--DSG---------FCNGPSKCYHTQAQSQTRT +SSTSPSMSPSPPPP------------------VATEAESPS------CNGINKRGSS-AM +DSGK-PPRASEVGSGQSQ------SGFCFGQKS--DAKQAAKTRSGNT-------ASRRR +NGRKQKVSS-KHDVSSEDEMPGSQW-----------HGVA +>gnl_Poptr2.2_PACid_18217800 +---MANGGE----------------------------------------------DKWKS +NDLYQVLGLNKECTDTELRSAYKKLALRWHPDRCSASG---NSKFVEEAKKKFQAIQQAY +S------VLSDTNKRFLYDVGVDDS--------DDDENGMGDFLNEMAVMMSQTKPSENM +--EESLEELQ------ELFDEMFQEDL--HS----------FGIDSQ------------- +----AAPSC--PPS------------------YVSYSESSN--------SNNKRVSA-DM +NLGK-TKVDDSSSFNSHF------EKFCLG-T-----------GGTAATFQEGEGGSKRR +NSRRSQRQT-KARQETKSFFGL------------------ +>gnl_Poptr2.2_PACid_18234651 +---MENGGE----------------------------------------------EKGKS +NDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASE---NSRFVDEAKKKFQTIQQAY +S------VLSDTNKRFLYDVGVYDS--------EDDENGMGGFMNEMAAMMSQTKPHENV +--EESFEELQ------GLFEEMFQEDL--DS----------FGIACQ------------- +----ATT-------------------------CVSYSESSN--------SNDKRVSV-DM +NLKK-TKVDDSSGFNSHV------EKFCLG-V-----------SGTPAIFQEGE-GSKRR +SSRRNRR--------------------------------- +>gnl_Poptr2.2_PACid_18234649 +MLRMENGGE----------------------------------------------EKGKS +NDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASE---NSRFVDEAKKKFQTIQQAY +S------VLSDTNKRFLYDVGVYDS--------EDDENGMGGFMNEMAAMMSQTKPHENV +--EESFEELQ------GLFEEMFQEDL--DS----------FGIACQ------------- +----ATT-------------------------CVSYSESSN--------SNDKRVSV-DM +NLKK-TKVDDSSGFNSHV------EKFCLG-VE--HQQSFKKGKGVRGGVQGGT-GGRER +KGRKQEVSS-GYDVSSHD-HGISAS--------------- +>gnl_Poptr2.2_PACid_18234650 +---MENGGE----------------------------------------------EKGKS +NDFYQVLGLNKDCTATELRNAYKKLALKWHPDRCSASE---NSRFVDEAKKKFQTIQQAY +S------VLSDTNKRFLYDVGVYDS--------EDDENGMGGFMNEMAAMMSQTKPHENV +--EESFEELQ------GLFEEMFQEDL--DS----------FGIACQ------------- +----ATT-------------------------CVSYSESSN--------SNDKRVSV-DM +NLKK-TKVDDSSGFNSHV------EKFCLG-VE--HQQSFKKGKGVRGGVQGGT-GGRER +KGRKQEVSS-GYDVSSHD-HGISAS--------------- +>gnl_Soltu3.4_PGSC0003DMP400016105 +---M-------------------------------------------------------G +NDYYAVLGLKKECTETELRNAYKKLALKWHPDRCSASG---NSKFVDEAKKKFQAIQEAY +S------VLSDANKRFLYDVGVYDSG------DDDDENGMGDFLNEMAAMMSQNKSNENQ +--EETFEELQ------DMFDEIFNSDN--GM------------SSSS------------- +----SSSSRTGTPS------------------MCSTTSSTSSSET-FFTFSNKRSSG-EM +KSGK--------GDSCQF------QGFCEG-T-----------GGASGKSNERE-RSRRK +NSKSGRKQ-------------------------------- +>gnl_Soltu3.4_PGSC0003DMP400016106 +---M-------------------------------------------------------G +NDYYAVLGLKKECTETELRNAYKKLALKWHPDRCSASG---NSKFVDEAKKKFQAIQEAY +S------VLSDANKRFLYDVGVYDSG------DDDDENGMGDFLNEMAAMMSQNKSNENQ +--EETFEELQ------DMFDEIFNSDN--GM------------SSSS------------- +----SSSSRTGTPS------------------MCSTTSSTSSSET-FFTFSNKRSSG-EM +KSGK--------GDSCQF------QGFCEG-VE--HLEKAMKENGVGGKIPRVD-GSNRM +DAKRQKVLS------------------------------- +>gnl_Ambtr1.0.27_AmTr_v1.0_scaffold00007.329 +---MAPR-------------------------------------------------GEKD +SDFYAILGLKKECSASDLRNAYKRLALRWHPDRCSASG---NTKFVEECKKKFQAIQQAY +S------VLSDANKRFLYDVGAYGS--------DDDDQGMGEFLGEMAVMMSQTKPSEKG +--PESFEDLQ------NLFQEMFERDL--DM----------FKSSTSH------------ +----NNNNDNNNNN------------------HRSSDNNNCSSVH-CFSNTNKRNCS-DM +NAGE-ASEVGRFAFSCYATEFLHKQTFSVG-AD------DVRSES----------SNKRR +NGRKQKSTS-SSRKS------------------------- +>gnl_Aquco1.0_PACid_18141277 +---MAS--------------------------------------------------EEEA +SDFYKVLGLKNNCSSLELRNAYKKLALKWHPDRCAASG---NSKFVEEAKKNFQAIQEAY +S------VLSDEQKRFMYDVGVYDKD------DDDENEDMGDFLGEMMSMMKQENTSADG +--QQSFEDLQ------NLFQEMVQNDK--EF----------YNPASQ------------- +----NSSIYNASNNMFSFSNNENLNNASNNTFSSFYNENLN--------SSNKKSCS-SM +SAEN--TKVDFNMESLDF------RSFSIG-LE--GGTSFQNSKGRG--------VTGRR +TGRKQKGSS-CNDMSSHD-SKILA---------------- +>gnl_Arath10_AT3G14200.1 +---MASSN-----------------------------------------------SEKIN +ENLYAVLGLKKECSKTELRSAYKKLALRWHPDRCSS------MEFVEEAKKKFQAIQEAY +S------VLSDSNKRFLYDVGAYNTD------DDDDQNGMGDFLNEMATMMNQSKPSDNN +-TGDSFEQLQ------DLFNEMFQGDA--AA----------FPSS--------------- +------SSCSTSNF------------------TSSRSFVFD--------TNSQRSSSFAT +SSMG--MNNDPFGYDPRA------HSFSLG-VD--HQQEFKKGKN----------NGGRR +NRRKNNVPSAGHETSSSNNYGVPTS--------------- +>gnl_Bradi1.2_Bradi3g60090.1 +---MATGG-------------------------------------DKCGGKPAA-AGVGG +GDLYSVLGVNKECSDADLKVAYRKLAMRWHPDRCSSSS---STKHMEEAKEKFQEIQGAY +S------VLSDANKRFLYDVGVYEEHEE---EDDDTLQGMGDFLGEMAHMMSQTQPAR-- +--QESFEELQ------QLFVDMFQSDI--ESG---------FCNGPAK-DHDPVQRQTRT +FSTPPSPSPSPPPP------------------LATVDEAAS------CNGINKRGSS-AM +GSGK-PPRAGEVSGGHGQ------SEFCFG-MS--DAKQAPKARGGN--------ASRRR +NGQKQKLSS-KHDVSSGDEMPRP-------------HAAV +>gnl_Carpa1.181_PACid_16420351 +---MADG------------------------------------------------EDKNN +SDLYAVLGLNKECTPAELRNAYKKLAMRWHPDRCSASG---NSMFVEEAKKKFQAIQEAY +S------VLSDANKRFLYDVGAYES--------DDDENGMGDFLNEMAAMMSQTKPNENG +NAQESFEELQ------ELFQEMFQGDMGFNT----------FGSSSQP------------ +----TTSSCSASSA------------------YATCSETSN-------PNNNKRNSS-EM +NYGK-KKVDDSSGFHAHF------QTFCLG-VE--QQQDFKKGKEARGGIRGKP-GGSRR +QGRKQKVSS-RHNVSSND-LGISAS--------------- +>gnl_Frave2.0_gene05408 +---MAGGKWVPPPLSQFHLHIKRFRRRQKVSGSSGNTEPSGYTDCKNISNR---RMEEKG +NDFYAVMGLKKECSDSELRNAYKKLALIWHPDRCSASG---NSKFVEEAKKKFQDIQQAY +S------VLSDANKRFLYDVGAYES--------DDDENGMGDFLNEMAVMMSQTKPNENG +--GESFEQLQ------ELFEEMFQGDI--EG----------FSSCSQP------------ +----PTSCSTSSSS------------------YALYCENST--------PSNKRNSS-AM +NYGN--ATLDSSGFDAHF------HNFCVG-T-----------GGKPAKDREGD-ARKRK +DSRRSNR--------------------------------- +>gnl_Mimgu1.0_PACid_17694730 +---MAAD-------------------------------------------------EEKS +SDFYGVLGLRKECTAAELRVAYKKLAMKWHPDRCSASG---NLKYVEEAKNKFQAVQQAY +S------VLSDANKRFLYDVGIYDSE------DDADENGMGDFLNEMVAMMGQSKPNENK +--NESFQELQ------DLFEEIFNNDA--EEV---------FKIPPPH------------ +--FPYQDSCSETRT-----------------------------------ASNKRNAR-EM +GSVN-----FSNIEATPF------EGFCIG-ENVIFGGERIQTRPG---------GGSRR +T--KPKIST-SID-----------------------GLIS +>gnl_Nelnu1.0_NNU_010544-RA +------------------------------------------------------------ +---------------------MNLLLQKWHPDRCSSSG---NSKFVEDSKKKFQAIQEAY +S------VLSDENKRFLYDVGVYDCDD-----DDDDENGMGEFLGEMATMMSQIKPSENG +--PESLEKLQ------ELFEEMFQRDM--DDG---------FFSPSPQCA---------- +---SFSSSCSSSSS---------------STTYFSYNNNKH---------DNKRNCS-DI +SSMD--DFYTFGTDSIQF------SNFCIG-VE--GGE-DSKVRGGK---------SRRK +SNRRQKVSSSKHDPSCR----------------------- +>gnl_Solly2.3_Solyc03g123560.2.1 +---M----------------------------------------------------EDKS +NDYYAVLGLKKECTDTELRNAYKKLALKWHPDRCSASG---NLKFVDEAKKQFQAIQEAY +S------VLSDANKKFLYDVGVYDSG------DDDDENGMGDFLNEMAAMMSQNKSNENQ +-GEETFEELQ------DMFNEMFNSDN--GT----------FSSSSS------------- +----SSSSWTGTPS------------------MCSTTSSTSSSET-FLTFPNKRSSG-EM +KSGS-----SVRGDSCQF------QGFCVG-A-----------GGTSGKCNERE-RSWRK +NSKSGRKH-------------------------------- +>gnl_Sorbi1.4_PACid_1968370 +---MDAGG-------------------------------------EKFSDAAAAEGGEGG +GDLYAVLGLKKECSDADLKVAYRKLAKKWHPDKCSSSS---SVKHMEEAKEKFQEIQGAY +S------VLSDANKRLLYDVGVYDDED-----DEDSMQGMGDFIGEMAQMMSQVRPTR-- +--QESFEELQ------QLFVDMFQSDI--DSG---------FCNGSAK-DQVQGQAKSRT +CSTSPSSSPSPPPP----------------PTIVKEAEVSS------CNGFNKRGSS-AM +DSGK-PPRP--VEGGAGQ------AGFCFG-VS--DTKQTPKPRGPN--------TSRRR +NGRKQKLSS-KHDVSSEDETAGS----------------- +>gnl_Thepa2.0_Tp3g12470 +---MASNN-----------------------------------------------SEKGN +DDLYGVLGLKKECTTTELRTAYKKLALRWHPDRCSSMG---TPEFVDEAKKKFQAIQEAY +S------VLSDSNKRFLYDVGAYNS-------DDEDQNGMGDFLNEMAAMMNQSKPSENN +-SGDSFEQLQ------DLFNEMFQGDA--AA----------FSSSSS------------- +------SSCSASTF------------------TSSCSFVFD--------TNSQRSPF-ET +SSMG---TNDLFGFDHSA------HTFSLG-VE--HQQDFKKGKN----------SGGRR +NRRKNNAQSAAHETASSNNYGVPTS--------------- +>gnl_Theca1.0_Tc06_g010450 +---MANG-------------------------------------------------EEKN +NDFYAVLGLNKECTPTELRTAYKKLALRWHPDRCSASG---NSKFVEEAKKKFQAIQQAY +S------VLSDSNKRFLYDVGAYDS--------DDDENGMGDFLNEMAGMMSQTKSNENG +--GESFEELQ------ELFEEMFQADI--DS----------FESTGQ------------- +----STPSCSASSS------------------FGSYGESSS---------SNKRNSS-EM +SSVE-TRLESSSSFDAQF------HSFCLG-VE--HRQDIKQHRGARGGMRGAAGGSRRR +NGRKQKVSS-GHDVTSND-CGISAS--------------- +>gnl_Vitvi12X_PACid_17827068 +---MAAG-------------------------------------------------EEKS +NDFYAVLGLKKECTASELRNAYKRLALMWHPDRCSSSG---NSKFVEEAKKKFQAIQEAY +S------VLSDANKRFLYDVGAYDS--------DDDENGMGDFLNEMAVMMSQTKSNENG +--KESFEELQ------ELFEDMFQRDV--DA----------FNSASHH------------ +----PMNSFPSSTS------------------TSSYCESSN--------ANNKRNSA-EM +GSGRMMSAGESSAFDAHF------QSFCFG-T-----------GGTPGRFQEGE-RSKRR +NSRRSQR--------------------------------- +>gnl_Selmo1.0_PACid_15401289 +---M----------------------------------------------------EKRK +EDPYTVLGVQKSSSSSEIRSAYRKLAMKWHPDK--------QHSLEDQAKAKFQGIQEAY +S------VLSDDKKRVLYDSGLYDEG------DDE------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------VS--------------------------- +---------------------------------------- +>gnl_Orysa6.0_PACid_16864430 +---MARGGG-----------------------------------G----------GGGAD +ADLYAVLGLSRECTDADLRLAYRKLAMIWHPDRCSVAGGSASAAGVDEAKERFQEIQGAY +S------VLSDSNKRFLYDVGVYDGNDGDDDDDEADLSGMGDFLGEMAQMMSQATP---- +--AESFEELQ------QLFVDMFQDDI--DAG---------LCQSTP------------- +----PPPSWPSPPA-------------------AANARSPAAAAT-SRKGVNKRCSP-AA +MDMD-----SGLSSLLGI------SGFCFE-AP--WTSQDASTAAGGG-------GGKRR +KQRPPPA---SHNV-------------------------- +>gnl_Sorbi1.4_PACid_1982925 +---MAATSH-----------------------------------CGNIQDQDEEASAPGA +ADLYAVLGLNRECTDAELRVAYRRLAMIWHPDRCSASG--SSPARMEEAKERFQEIQGAY +S------VLSDSNKRLLYDVGVYDSDD-----DEADLSGMGDFLGEMADMMSQATP---- +--TETFEELQ------QVFVDMFQDDLD-DAG---------FFGGLPT-TGRRAQA--PS +TSLPPSVSSSPLRP----------------TPAAGRSKGPQATPSSSFKGVERRGST-ST +AKRPRPNGSAGLESDLGL------SGFCFM-VS--KEMSKSKERQAV---WASD-DGDRS +TDGKQRLST-SRDVSGGG-MSRSLQGQSSKNLLQCMASKS +>gnl_Medtr3.5_Medtr8g022310.3 +---MANE-------------------------------------------------GNKS +NDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASG---NVKFVEEAKKKFQAIQEAY +S------VLSDSNKRLMYDVGVYDS--------DDDENVRHLF----------------- +---HTIHELG------TLFCVMFCFFISLR------------GE---------------- +---------KRSNL------------------NLTFSLSH-------------------- +------------------------------------------------------------ +---------------------------------------- +>gnl_Nelnu1.0_NNU_000115-RA +---MEVD------------------------------------------------SHRSS +PSYYTILGVDQNSSASEIRNAYRKLAMQWHPDKWTKTP-----SLLEKAKSKFQQIQEAY +SGGLLVFMLSDQGKRTLYDVGLYDPDD-----ETNDEVGLRRF--HAGDDISHERC---- +--EETGEEIQLGGTTGDVSGNVTRAGV--EDGECWWCVVVRWSCSLKE------------ +----------------------------------------------------------EL +KEGQ--------------------MGIISE-SD--DA----------------------- +----------GHDTPSLPHLHGSEL-----ELLGRTGCCN +>contig_7 +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------ENEWSGAEFLNEMAAMMTQNKSNENG +--TGTFEELQ------QLFDEMFQSDI--ES----------FNGCSSS------------ +----SNETCS---------------------------------------NSNKRNSI-ES +SSAN----------------FRPENGNESG-----------------------E-ISGKK +NTRKGKGDX-------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3722.fna Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,378 @@ +>gnl_Glyma1.01_PACid_16266208 +ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA +GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG +CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA +GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC +AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG +GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA +CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG +GGTGAACCAACTCCAAGATATAAAGAAGGGAAAGGGGGGAATTCTAGGAGGAGGAGGTAGAAGTAGACACAGAAGTGGCA +GAAAGCAAAAAATGTTCTATGGCCATGATGTT +>gnl_Glyma1.01_PACid_16266209 +ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA +GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG +CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA +GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC +AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG +GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA +CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG +GACAGGTGAACCAACTCCAAGATATAAAGAAGGGAAAGGGGGGAATTCTAGGAGGAGGAGG +>gnl_Glyma1.01_PACid_16266210 +ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA +GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG +CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA +GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC +AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG +GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA +CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG +GGTCGGTCATGTAAACTATCATTACCAA +>gnl_Glyma1.01_PACid_16301083 +ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA +GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG +CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA +GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC +GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG +GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA +CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG +GGTGAACCAACTCCAAGATATAAAGAAGAAAAAGGGGGGAATTCTAGGAGGAGGTAGAAGTAGACACAGAAATGGCAGAA +AGCAAAATATGTCCTATGGCCATGATGTTTCATCGAATGACTACCCTGGAATTTCCACAAAG +>gnl_Glyma1.01_PACid_16301085 +ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA +GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG +CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA +GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC +GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG +GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA +CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG +GGTAGGTCATGTAAACTATCATTACCAA +>gnl_Glyma1.01_PACid_16301084 +ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA +GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG +CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA +GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC +GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG +GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA +CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG +GACAGGTGAACCAACTCCAAGATATAAAGAAGAAAAAGGGGGGAATTCTAGGAGGAGG +>gnl_Medtr3.5_Medtr8g022310.1 +ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG +GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG +CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA +GTTTACGACAGTGATGATGACGAAAATGGTATGGGAGACTTTCTGAATGAAATGGTTACAATGATGAGCCAAACTAAATC +AAATGAAAATGGAGAGGAGAGCTTCGAGGAGTTACAACAGTTGTTTGATGATATGTTTCAAGCGGATATCGGATTAAATG +GAAGCACCTCTCTTAATGCTTCGGGTTGCTCCACTTCATCGACTTTCATGACGTTCAGTGAAAGCTCGAATTCAAATAAG +CGCAATTCCACTCAAATGAATTTTGGGAAGGCAGAGGATTCTTCTAGTTTTGGTGCAAATTACCAGAACTTCTGTTTTGG +GATGAAGCACCTTCAAGAAGATGTGGAGAAGGAAAAAGGGGGAATTCTAGAAGGAGGAGGTAGCAAAAAACAAAGAAAAG +GAAGAAAACAAAAAATTTCATGTGGACATGTTTCCTCTAATGACCATCCTGGTATTTCTGCTAAT +>gnl_Medtr3.5_Medtr8g022310.2 +ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG +GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG +CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA +GTTTACGACAGTGATGATGACGAAAATGGTATGGGAGACTTTCTGAATGAAATGGTTACAATGATGAGCCAAACTAAATC +AAATGAAAATGGAGAGGAGAGCTTCGAGGAGTTACAACAGTTGTTTGATGATATGTTTCAAGCGGATATCGGATTAAATG +GAAGCACCTCTCTTAATGCTTCGGGTTGCTCCACTTCATCGACTTTCATGACGTTCAGTGAAAGCTCGAATTCAAATAAG +CGCAATTCCACTCAAATGAATTTTGGGAAGGCAGAGGATTCTTCTAGTTTTGGTGCAAATTACCAGAACTTCTGTTTTGG +GGTCAATCTTGTAAATTATCATTACCAA +>gnl_Musac1.0_GSMUA_Achr6T31040_001 +ATGGCGGCCGAAGAGGACAAAAGCGGCGACTTCTACGCCGTGTTGGGGCTCAGGAAGGAGTGCTCCGAAACCGAGCTGAG +GAATGCGTACAAGAAGCTTGCCATGAGGTGGCATCCGGATAAGTGCTTGGCTTCGGGAAATGCTCAAATCGTGGGGGAAG +CCAAGGAGAAGTTTCAGGAGATCCAGAAAGCCTACTCTGTTCTCTCAGACTCCAATAAGAGATTCCTGTACGATGTGGGA +GTCTACGACAACGATGATGACAATGACGAAAACGGTATGGGAGACTTTATAGGGGAGATGTTGGAGATGATGAGCCAAAC +GAAACCCAATGAGAACAGCCAAGATAGCTTCCAGGAGCTGCAGGAGCTGTTTGTGGAGATGTTCCAGGACGACCTGGATG +CCGGATTTGGTGGTTCCATCTTCCACGATTGCCCCTGGGCTCAACCGACCAATGGCCAAGATTGCTGGACTTCATCGGGA +CTGCACTTTGCTAATGGAAGGAGTAAGTGTGGCAACAAGCGGGGCAACTCAGCTGTGAACTTGGGAAAGGTCAATCTTGA +AGAGTTGGAACATGGTACCAGCGACTTCTATTTTGGGCTAAATGATGCAGCACAGCCATCACAAGGGAAAGGAGGCAGTA +ATAACAAGAGAAGGAATGGAAGAAAGCAAAAGGTTTCATCCAATCATGATGTCTCATCC +>gnl_Musac1.0_GSMUA_Achr9T18140_001 +ATGGCCGCCGGGGAGGAAAAGATCGGCGATTTCTACACGGTGCTGGGGCTCAGGAAGGAGTGCTCGGAGGCGGAGCTGAG +GATCGCGTACAAGAAGCTGGCTATGAGATGGCATCCGGATAAGTGCTCGGCTTCGGGAAACCATCGAAGGATGGAGGAAG +CGAAGGAGAAGTTCCAGGAAATCCAAAAAGCCTACTCTGTTCTCTCGGACTCCAGCAAGAGATTTCTGTATGATGTGGGA +ATCTACGATAACGAGGATGATAATGACGAAAAAGGAATGGGGGATTTTATTGGGGAGATAGCTCAGATGATGAGCCAAAC +GAAATCTGGGGAGAATGGTCATGATAGCTTTGAGGAGCTGCAGCGGATGTTCCTGGATATGTTCCAGGACGACCTGGACG +CCGGATTCGGTGATTCTTCCATCCACAGTGGCCCCCAAGCTCGGCCAACCGACGGTCTCAATTGCTCGATGCCATCAGGA +CTGCAGTTTGCTGATGGAGGGAACAATGGCAGCAACAAGAGAGGCAACTCGGAGAAGGCAAAGCTGGATGGGTTGGAAAA +CAGTTCCACTGGCTTCTGCTTCGGGTTGAATGATGCAGGGCAGTCATCAAAAGGAAAAGGAAGCGCTAATAGCAAGAGAA +GGAATGGAAGAAAGCAGAAGGTCTCATCCAAACATGATGTCTCATCCAGTGATGCTGAGGTCTCATTT +>gnl_Musac1.0_GSMUA_Achr8T23700_001 +ATGGCGAGCGACATGGATGCAAGCGGCGATTTCTACTCGGTGCTGGGGCTGAAGAAGGAGTGCTCCGAGGCGGAGCTCAG +GAATGCGTACAAGAAGCTCGCTTTGAAGTGGCATCCCGATAAGTGCTCGGCGTCGGGTAATGAGATTCGCATGAAGGAAG +CGAAGCAGCAGTTCCAGGAGATCCAGAAAGCCTACTCTGTTCTCTCCGACTCCAACAAGAGATTTCTGTACGATGTTGGA +GCCTACGACAAAGACGACGACAAAGACGAAGAGGGGATGGTGGAGTTTCTTGGGGAGATGGCGCAAATGATGAGGCAAAC +CAAATGCTGTGGGAGCGGCCAGGAGAGCTTCGAGCAGCTGCAGCAGATGTTCGTGGAGATGTTCCACGACGATCTGGACG +CGGGATTCTGCGGCCACTCCTCGGCCACCTCGGGCGCGGCGTCCTGCGGCAACAAACGGGACAACTCGGCGATGGACTCG +GGCAAGCGGAAGCCGGACGAGTTGGACCCGGCCGCCATTGGGTTCTGCCTCGGGACAAAGGATGCAGGGCAATCCTCAAA +AGGAAGAGGTAGCAACAGCAAGAGAAGGAACAGAAGAAAGCAAAAGGCATCATCCAAGCATGACAACTCATCTCACAATG +CTAAGGTCTCAGCT +>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001 +ATGGAGGGGGACGAGGAGAAGAGCGGGGATTTCTACGCGGTGCTGGGGCTGAAGAAAGAGGGCTCCATGGCGGAGCTCAA +GAATGCGTACAAGAAGCTGGCGATGAAGTGGCATCCCGATAAGTGTCCTGCGTCAGGCAATAAGATACGCATGGATAAAG +CGAAGGAGAAGTTCCAGGAGATCCAAAAAGCCTACTCTGTTCTCTCCGACTCCAACAAGCGATTCCTGTACGATGTCGGA +GTTTACGACAAAGACGATGAGGAAGATGAAGAGGGGATGGGGGACTTCATTGGGGAGATCGCGCAAATGATGAGCCAGTC +CAAACCCAGCGGGAGCGGCCACGAGAGCTTGGAGGAGCTGCATCGGCAGGTCGTGGAGATGTTCCTCGACGAACTGGACG +CCGGAGATCGCTTCTCCTCGGCCAACCAAGGCGCGTCGTCCTGCGACGGCAGGGACGACGGCGGCGGTAACAAGCGTGGC +AACTGGGCGGTGGACTGGGGCAAGGAGAAGCTGAACGAGTTGGGCCCGGGCACCGGCGGGTTCTGCTTCGGGGTGAGTCG +CCGAGTCCACTCCTTTGATCTTATGATAGACGTAGTCCACCTCATCCATTCTGATCTGACTCTGGAA +>gnl_Orysa6.0_PACid_16843526 +ATGGCCGACGGGGGAGAGAAGTGCCGGGACGCGGCCGGCGAGGGCGGCGGCGGCGGCGACCTGTACGCCGTGCTCGGGCT +CAAGAAGGAGTGCTCCGACGCCGACCTCAAGCTCGCGTACCGGAAGCTCGCCATGAGATGGCATCCGGACAAATGCTCAT +CCTCCAGCAGTGCAAAGCACATGGAGGAAGCCAAGGAGAAGTTCCAGGAGATCCAGGGCGCCTATTCCGTCCTCTCAGAC +TCAAACAAGCGGTTCCTCTACGACGTGGGGGTATATGATGATGACGACAATGACGATGACAACCTGCAGGGGATGGGGGA +CTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGCACGGCCAACGAGGCAGGAGAGCTTTAAAGAACTGCAGCAGCTAT +TCGTAGACATGTTCCAAGCTGATCTTGATTCGGGTTTCTGCAATGGACCCTCAAAGTGCTACCATACCCAGGCCCAAAGC +CAGACTCGAACATCCTCAACCTCCCCTTCGATGTCACCGTCTCCACCGCCTCCAGTAGCTACTGAGGCAGAATCGCCATC +ATGTAATGGTATTAATAAGCGTGGTTCATCAGCAATGGACTCTGGGAAGCCTCCAAGAGCCAGCGAAGTCGGTTCTGGAC +AGAGTCAATCAGGGTTTTGTTTCGGGAAGAGTGATGCTAAACAAGCGGCGAAGACGCGAAGCGGGAACACGGCCAGCCGG +AGGAGGAACGGCCGGAAGCAGAAGGTGTCGTCGAAGCACGACGTCTCGTCTGAGGACGAGATGCCAGGTTCGCAGTGGCA +CGGCGTGGCC +>gnl_Orysa6.0_PACid_16843528 +ATGGCCGACGGGGGAGAGAAGTGCCGGGACGCGGCCGGCGAGGGCGGCGGCGGCGGCGACCTGTACGCCGTGCTCGGGCT +CAAGAAGGAGTGCTCCGACGCCGACCTCAAGCTCGCGTACCGGAAGCTCGCCATGAGATGGCATCCGGACAAATGCTCAT +CCTCCAGCAGTGCAAAGCACATGGAGGAAGCCAAGGAGAAGTTCCAGGAGATCCAGGGCGCCTATTCCGTCCTCTCAGAC +TCAAACAAGCGGTTCCTCTACGACGTGGGGGTATATGATGATGACGACAATGACGATGACAACCTGCAGGGGATGGGGGA +CTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGCACGGCCAACGAGGCAGGAGAGCTTTAAAGAACTGCAGCAGCTAT +TCGTAGACATGTTCCAAGCTGATCTTGATTCGGGTTTCTGCAATGGACCCTCAAAGTGCTACCATACCCAGGCCCAAAGC +CAGACTCGAACATCCTCAACCTCCCCTTCGATGTCACCGTCTCCACCGCCTCCAGTAGCTACTGAGGCAGAATCGCCATC +ATGTAATGGTATTAATAAGCGTGGTTCATCAGCAATGGACTCTGGGAAGCCTCCAAGAGCCAGCGAAGTCGGTTCTGGAC +AGAGTCAATCAGGGTTTTGTTTCGGGCAGAAGAGTGATGCTAAACAAGCGGCGAAGACGCGAAGCGGGAACACGGCCAGC +CGGAGGAGGAACGGCCGGAAGCAGAAGGTGTCGTCGAAGCACGACGTCTCGTCTGAGGACGAGATGCCAGGTTCGCAGTG +GCACGGCGTGGCC +>gnl_Poptr2.2_PACid_18217800 +ATGGCAAACGGAGGAGAAGATAAATGGAAAAGCAATGACTTATATCAAGTCTTGGGGTTGAATAAGGAATGCACTGATAC +AGAGCTCAGGAGTGCTTATAAGAAACTTGCACTGAGATGGCATCCAGATCGATGTTCAGCTTCAGGAAATTCTAAGTTCG +TTGAAGAAGCCAAAAAGAAGTTTCAGGCAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGGTTTCTTTAC +GACGTTGGTGTTGATGACAGTGATGACGACGAAAATGGAATGGGTGATTTTCTGAATGAAATGGCTGTCATGATGAGCCA +AACGAAGCCTAGCGAAAACATGGAGGAGAGCCTAGAGGAACTGCAAGAATTATTTGACGAGATGTTCCAAGAGGATCTGC +ATTCGTTTGGGATTGACAGTCAGGCTGCTCCTTCATGTCCTCCTTCTTATGTATCCTACAGCGAAAGTTCCAACTCAAAT +AACAAACGTGTTTCTGCTGATATGAACTTGGGGAAGACTAAAGTGGATGATTCTTCTAGCTTCAACTCTCACTTTGAGAA +ATTCTGTTTAGGGACAGGTGGAACAGCAGCAACCTTTCAAGAAGGTGAAGGTGGGAGTAAGAGGAGGAATTCAAGGAGGA +GCCAGCGGCAGACGAAGGCAAGACAAGAAACAAAGAGTTTCTTCGGGCTA +>gnl_Poptr2.2_PACid_18234651 +ATGGAAAATGGAGGAGAAGAGAAAGGGAAAAGCAATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCAC +AGAGCTGAGGAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAAAATTCCAGGTTCG +TTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGGTTTCTTTAC +GACGTTGGTGTTTATGACAGTGAAGACGACGAAAATGGAATGGGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCA +AACAAAGCCCCACGAAAACGTGGAGGAGAGCTTTGAGGAATTGCAAGGATTGTTTGAGGAGATGTTCCAAGAGGATTTGG +ATTCGTTTGGGATTGCCTGTCAGGCTACTACCTGTGTGTCATACAGCGAAAGCTCCAACTCAAATGATAAACGTGTTTCT +GTCGATATGAACTTGAAGAAGACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTCGAGAAGTTCTGTTTAGGGGT +AAGTGGAACACCAGCAATCTTTCAAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACCGGCGG +>gnl_Poptr2.2_PACid_18234649 +ATGCTAAGAATGGAAAATGGAGGAGAAGAGAAAGGGAAAAGCAATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTG +CACTGCCACAGAGCTGAGGAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAAAATT +CCAGGTTCGTTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGG +TTTCTTTACGACGTTGGTGTTTATGACAGTGAAGACGACGAAAATGGAATGGGCGGATTTATGAATGAAATGGCTGCTAT +GATGAGCCAAACAAAGCCCCACGAAAACGTGGAGGAGAGCTTTGAGGAATTGCAAGGATTGTTTGAGGAGATGTTCCAAG +AGGATTTGGATTCGTTTGGGATTGCCTGTCAGGCTACTACCTGTGTGTCATACAGCGAAAGCTCCAACTCAAATGATAAA +CGTGTTTCTGTCGATATGAACTTGAAGAAGACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTCGAGAAGTTCTG +TTTAGGGGTGGAACACCAGCAATCTTTCAAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACCGGCGGTAGAG +AGAGGAAAGGCAGGAAACAAGAAGTTTCATCTGGCTATGATGTCTCCTCCCATGACCATGGTATTTCTGCTTCA +>gnl_Poptr2.2_PACid_18234650 +ATGGAAAATGGAGGAGAAGAGAAAGGGAAAAGCAATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCAC +AGAGCTGAGGAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAAAATTCCAGGTTCG +TTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTATTCTGTTCTTTCTGACACCAACAAGAGGTTTCTTTAC +GACGTTGGTGTTTATGACAGTGAAGACGACGAAAATGGAATGGGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCA +AACAAAGCCCCACGAAAACGTGGAGGAGAGCTTTGAGGAATTGCAAGGATTGTTTGAGGAGATGTTCCAAGAGGATTTGG +ATTCGTTTGGGATTGCCTGTCAGGCTACTACCTGTGTGTCATACAGCGAAAGCTCCAACTCAAATGATAAACGTGTTTCT +GTCGATATGAACTTGAAGAAGACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTCGAGAAGTTCTGTTTAGGGGT +GGAACACCAGCAATCTTTCAAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACCGGCGGTAGAGAGAGGAAAG +GCAGGAAACAAGAAGTTTCATCTGGCTATGATGTCTCCTCCCATGACCATGGTATTTCTGCTTCA +>gnl_Soltu3.4_PGSC0003DMP400016105 +ATGGGCAATGATTATTATGCAGTTTTGGGATTGAAAAAGGAATGCACTGAAACAGAGCTTAGGAATGCTTATAAGAAGCT +TGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGGAATTCGAAGTTTGTAGATGAAGCAAAGAAGAAATTTCAGG +CAATTCAAGAAGCATATTCTGTGTTATCGGATGCAAACAAAAGGTTTCTGTACGATGTAGGAGTTTATGACTCTGGTGAT +GATGACGACGAAAATGGCATGGGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCA +GGAAGAAACCTTTGAGGAATTGCAGGATATGTTTGACGAAATTTTCAATAGTGATAATGGGATGTCTTCTTCTTCTTCTT +CTTCTTCTCGGACTGGAACTCCTTCAATGTGTTCTACTACATCGTCTACATCTTCCAGTGAGACCTTTTTTACCTTTTCC +AACAAAAGAAGTTCAGGTGAAATGAAGTCGGGTAAAGGCGATTCTTGCCAATTCCAAGGATTTTGTGAAGGGACAGGTGG +AGCATCTGGAAAAAGCAATGAAAGAGAACGGAGTCGGAGGAAAAATTCCAAGAGTGGACGGAAGCAA +>gnl_Soltu3.4_PGSC0003DMP400016106 +ATGGGCAATGATTATTATGCAGTTTTGGGATTGAAAAAGGAATGCACTGAAACAGAGCTTAGGAATGCTTATAAGAAGCT +TGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGGAATTCGAAGTTTGTAGATGAAGCAAAGAAGAAATTTCAGG +CAATTCAAGAAGCATATTCTGTGTTATCGGATGCAAACAAAAGGTTTCTGTACGATGTAGGAGTTTATGACTCTGGTGAT +GATGACGACGAAAATGGCATGGGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCA +GGAAGAAACCTTTGAGGAATTGCAGGATATGTTTGACGAAATTTTCAATAGTGATAATGGGATGTCTTCTTCTTCTTCTT +CTTCTTCTCGGACTGGAACTCCTTCAATGTGTTCTACTACATCGTCTACATCTTCCAGTGAGACCTTTTTTACCTTTTCC +AACAAAAGAAGTTCAGGTGAAATGAAGTCGGGTAAAGGCGATTCTTGCCAATTCCAAGGATTTTGTGAAGGGGTGGAGCA +TCTGGAAAAAGCAATGAAAGAGAACGGAGTCGGAGGAAAAATTCCAAGAGTGGACGGAAGCAATAGGATGGATGCTAAAA +GGCAAAAGGTTCTATCA +>gnl_Ambtr1.0.27_AmTr_v1.0_scaffold00007.329 +ATGGCACCCCGAGGAGAGAAAGACAGTGATTTTTATGCAATTTTAGGGTTGAAGAAGGAGTGCTCTGCTTCAGATCTCAG +AAATGCGTACAAAAGGCTCGCACTTCGGTGGCATCCAGATAGGTGCTCTGCCTCAGGGAACACAAAGTTTGTGGAGGAAT +GCAAGAAAAAGTTCCAGGCCATTCAGCAGGCTTATTCCGTGCTCTCGGATGCAAATAAGAGGTTTTTGTACGATGTTGGA +GCATATGGAAGTGACGATGACGATCAGGGAATGGGTGAATTTCTTGGGGAGATGGCGGTAATGATGAGCCAGACAAAGCC +CAGTGAAAAAGGGCCGGAGAGCTTTGAGGATCTACAGAACTTGTTTCAGGAGATGTTCGAAAGGGATCTGGACATGTTTA +AGTCATCGACCTCCCACAACAACAACAATGATAACAACAATAATAATCATAGGAGTAGTGACAATAATAATTGTAGTAGT +GTTCATTGTTTTAGTAACACCAACAAGAGGAATTGCTCGGATATGAATGCCGGAGAAGCCTCGGAGGTCGGGCGCTTTGC +TTTCTCATGCTATGCGACAGAGTTCCTGCACAAGCAAACTTTCTCCGTCGGGGCGGATGATGTGCGATCGGAGTCGAGCA +ACAAGAGACGAAACGGGAGGAAACAGAAATCCACCTCCTCTTCAAGGAAAAGT +>gnl_Aquco1.0_PACid_18141277 +ATGGCTAGTGAAGAAGAAGCAAGTGATTTCTATAAAGTTTTGGGATTGAAAAATAACTGTTCTTCCTTGGAGCTCAGGAA +CGCTTATAAGAAGCTTGCACTGAAATGGCATCCGGATCGTTGTGCTGCTTCAGGAAACTCAAAGTTTGTTGAAGAAGCAA +AGAAGAATTTTCAAGCAATACAAGAAGCTTATTCTGTTCTTTCAGATGAGCAGAAACGATTTATGTATGACGTTGGTGTC +TACGATAAAGATGATGATGACGAAAATGAAGATATGGGCGATTTTTTAGGTGAAATGATGTCTATGATGAAGCAAGAAAA +TACTAGTGCGGATGGACAACAAAGTTTTGAAGACCTTCAAAACCTATTCCAGGAAATGGTTCAAAATGACAAAGAATTTT +ATAATCCAGCCTCTCAAAACTCGTCGATATATAATGCTAGTAACAACATGTTCTCCTTCTCTAATAACGAGAACTTAAAC +AATGCTAGCAACAACACATTCTCCTCTTTTTATAATGAGAACCTAAACAGCAGCAACAAGAAAAGTTGCTCAAGTATGAG +CGCAGAAAATACCAAGGTGGACTTTAACATGGAATCATTGGACTTCCGCAGCTTTTCTATTGGGTTAGAAGGCGGTACAT +CATTTCAAAACTCCAAAGGGAGAGGAGTAACGGGTAGGCGGACAGGAAGGAAACAGAAGGGGTCGTCCTGTAATGATATG +TCTTCCCATGATTCTAAGATTTTGGCG +>gnl_Arath10_AT3G14200.1 +ATGGCGTCCAGTAATAGCGAGAAGATCAACGAGAATCTGTACGCTGTTCTGGGTTTGAAGAAGGAATGTTCTAAGACGGA +GCTCCGTTCTGCTTATAAGAAGCTTGCTCTCAGATGGCATCCAGATCGTTGTTCGTCAATGGAGTTTGTAGAAGAAGCAA +AGAAGAAATTTCAGGCAATCCAAGAAGCCTACTCTGTTCTGTCTGACTCCAACAAGAGGTTCCTGTATGATGTTGGTGCT +TATAATACTGATGATGATGATGACCAAAACGGAATGGGAGATTTCTTGAACGAAATGGCGACTATGATGAATCAATCCAA +GCCTAGTGATAATAACACAGGGGACAGTTTTGAACAACTACAAGATCTGTTTAATGAGATGTTTCAAGGAGACGCTGCAG +CATTCCCATCATCATCGTCCTGCTCCACTTCAAATTTCACTTCATCTCGTAGTTTTGTATTCGATACAAATTCTCAGCGG +TCATCTTCGTTTGCGACAAGTTCGATGGGGATGAATAATGATCCTTTCGGATATGACCCGAGAGCTCATTCCTTCTCTTT +AGGGGTGGACCATCAGCAAGAGTTCAAGAAAGGGAAAAACAATGGCGGAAGAAGAAACAGGAGAAAGAACAATGTTCCAT +CGGCTGGTCACGAAACGTCGTCGTCAAACAACTATGGAGTCCCCACCTCA +>gnl_Bradi1.2_Bradi3g60090.1 +ATGGCCACCGGGGGCGACAAGTGCGGCGGAAAGCCGGCGGCCGCGGGGGTGGGCGGCGGTGACCTGTACTCTGTGCTGGG +CGTCAACAAGGAGTGCTCCGACGCCGACCTCAAGGTCGCCTACCGGAAGCTCGCCATGAGATGGCATCCGGATAGATGCT +CCTCCTCCAGCAGCACCAAGCACATGGAGGAAGCAAAAGAGAAGTTCCAGGAGATCCAGGGCGCCTATTCCGTCCTCTCC +GATGCCAACAAGCGCTTCCTCTATGACGTGGGGGTATATGAAGAACATGAAGAAGAAGATGATGACACTCTGCAGGGGAT +GGGGGACTTCCTTGGTGAGATGGCCCATATGATGAGCCAGACGCAGCCAGCGAGACAGGAAAGCTTTGAGGAGCTCCAGC +AGCTCTTCGTGGACATGTTCCAGTCTGATATTGAATCGGGATTCTGCAACGGACCTGCCAAGGACCATGACCCAGTCCAA +AGACAGACGCGAACATTCTCGACCCCTCCTTCGCCATCGCCATCTCCACCGCCTCCACTAGCTACAGTGGACGAAGCGGC +ATCATGTAATGGCATCAATAAGCGTGGCTCATCAGCAATGGGCTCTGGGAAGCCTCCAAGAGCTGGTGAAGTGAGTGGGG +GTCACGGCCAGTCTGAGTTCTGTTTCGGGATGAGCGACGCCAAGCAAGCGCCGAAGGCGCGAGGCGGGAACGCTAGCAGG +AGAAGGAACGGCCAGAAGCAGAAACTGTCGTCGAAGCACGACGTCTCCTCCGGCGATGAGATGCCGAGACCACATGCAGC +AGTA +>gnl_Carpa1.181_PACid_16420351 +ATGGCGGATGGAGAAGACAAGAACAACAGTGATTTGTATGCGGTTCTTGGATTGAATAAGGAATGTACTCCAGCAGAGCT +CAGGAACGCTTATAAGAAACTTGCAATGAGATGGCATCCAGATCGCTGTTCCGCGTCGGGGAATTCAATGTTTGTGGAAG +AAGCAAAGAAGAAATTTCAGGCAATCCAAGAAGCCTACTCTGTTCTTTCTGACGCAAACAAGAGGTTTCTGTACGACGTC +GGAGCTTACGAAAGTGATGACGACGAAAATGGAATGGGTGATTTTTTAAACGAAATGGCAGCCATGATGAGCCAAACAAA +GCCTAATGAGAATGGGAATGCACAAGAGAGCTTTGAAGAATTGCAAGAGTTGTTTCAAGAGATGTTTCAAGGGGATATGG +GATTCAACACATTTGGATCTAGTTCTCAGCCTACTACTTCTTCGTGTTCTGCTTCCTCTGCATATGCAACCTGTAGCGAA +ACCTCCAATCCTAACAACAACAAGCGCAATTCATCAGAAATGAATTATGGCAAGAAAAAGGTAGATGATTCTTCAGGGTT +TCATGCTCATTTCCAAACCTTTTGTTTAGGGGTGGAACAGCAGCAAGATTTCAAGAAGGGGAAGGAAGCAAGAGGAGGAA +TTCGAGGAAAACCCGGAGGTAGTAGGAGGCAGGGAAGGAAACAGAAGGTTTCATCTCGCCACAATGTCTCATCCAATGAC +TTGGGCATTTCTGCTTCC +>gnl_Frave2.0_gene05408 +ATGGCGGGGGGAAAGTGGGTCCCCCCACCCCTGTCCCAGTTTCATCTTCACATAAAGAGGTTCCGTCGACGTCAGAAAGT +CTCTGGATCCAGTGGAAACACAGAACCTTCTGGATACACAGACTGCAAAAATATCTCCAATCGAAGAATGGAAGAGAAAG +GCAATGACTTTTATGCTGTTATGGGGTTGAAGAAGGAATGCTCTGACTCGGAGCTCAGGAATGCTTATAAGAAACTTGCA +CTGATATGGCACCCAGATCGTTGCTCTGCCTCAGGAAATTCAAAGTTCGTGGAAGAAGCCAAGAAGAAGTTTCAGGACAT +TCAACAAGCCTATTCTGTTCTGTCCGACGCCAACAAGAGGTTTCTGTACGATGTAGGAGCTTATGAAAGTGATGATGACG +AAAATGGAATGGGTGATTTTTTAAACGAGATGGCGGTGATGATGAGCCAGACTAAGCCGAATGAAAATGGAGGAGAGAGC +TTCGAACAATTGCAGGAGCTCTTTGAAGAAATGTTTCAGGGGGATATTGAGGGCTTTAGCTCCTGCTCTCAGCCTCCTAC +TTCCTGTTCTACTTCCTCATCTTCATACGCATTGTACTGTGAAAATTCTACTCCCAGTAACAAACGTAATTCCTCCGCAA +TGAATTATGGCAACGCAACCCTGGACAGTTCTGGTTTTGATGCTCATTTTCACAATTTCTGTGTAGGGACAGGCGGGAAG +CCAGCAAAGGATCGGGAAGGGGATGCCAGGAAGAGAAAGGATTCCAGGAGGAGTAACCGG +>gnl_Mimgu1.0_PACid_17694730 +ATGGCTGCTGATGAAGAGAAAAGCAGCGATTTTTACGGCGTTCTGGGGCTGAGGAAAGAATGTACGGCGGCGGAGCTCAG +GGTTGCCTACAAGAAACTTGCAATGAAATGGCATCCAGATCGTTGCTCTGCTTCTGGGAATTTAAAGTATGTGGAGGAAG +CAAAGAACAAGTTTCAAGCTGTCCAACAGGCCTATTCTGTGCTTTCCGATGCCAACAAAAGGTTTCTCTACGACGTAGGA +ATCTACGATTCTGAAGACGATGCTGACGAAAACGGTATGGGTGATTTCTTGAATGAAATGGTAGCAATGATGGGCCAAAG +TAAACCAAATGAAAATAAAAACGAGAGCTTCCAAGAATTGCAAGATCTATTCGAGGAAATATTCAACAATGACGCGGAAG +AGGTTTTCAAGATTCCTCCTCCGCACTTTCCGTACCAAGATTCTTGCAGCGAGACCCGCACCGCATCGAACAAGAGGAAC +GCCCGCGAAATGGGCTCCGTAAATTTCAGTAATATCGAAGCCACACCATTTGAAGGGTTCTGCATAGGGGAAAATGTAAT +TTTTGGGGGAGAGAGAATACAAACGAGGCCCGGAGGAGGTAGTAGGAGGACGAAGCCGAAGATTTCGACATCGATCGATG +GTTTAATTAGT +>gnl_Nelnu1.0_NNU_010544-RA +ATGAACCTGTTGTTGCAGAAATGGCATCCGGATCGATGCTCCTCGTCGGGAAACTCTAAGTTCGTGGAAGATTCAAAGAA +GAAATTTCAGGCAATTCAAGAGGCTTATTCTGTTCTATCCGACGAGAATAAGCGATTTCTTTACGACGTTGGAGTTTACG +ACTGCGACGACGATGACGATGACGAAAACGGAATGGGAGAATTTTTGGGGGAAATGGCGACTATGATGAGCCAAATTAAA +CCCAGCGAGAACGGGCCGGAGAGTTTGGAGAAGCTGCAGGAACTGTTCGAGGAAATGTTCCAAAGGGACATGGATGATGG +TTTCTTCTCCCCCTCCCCCCAATGCGCTTCTTTTTCTTCGTCTTGCTCATCTTCTTCGTCGTCGACGACTTATTTTTCAT +ATAATAACAACAAGCACGACAATAAAAGGAATTGCTCCGACATCAGTTCTATGGACGATTTCTACACATTTGGCACGGAT +TCTATACAATTCAGCAATTTCTGCATTGGGGTGGAAGGAGGAGAAGATTCAAAAGTAAGAGGAGGAAAGTCAAGGCGGAA +GAGCAACAGGAGACAAAAAGTTTCATCGTCTAAACACGATCCGTCGTGCCGT +>gnl_Solly2.3_Solyc03g123560.2.1 +ATGGAAGACAAAAGCAATGATTATTATGCAGTTTTGGGGTTGAAGAAGGAATGCACTGACACAGAACTTAGGAATGCCTA +TAAGAAGCTTGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGGAATTTGAAGTTTGTAGATGAAGCAAAGAAGC +AATTTCAGGCAATTCAAGAAGCATATTCTGTGTTATCGGATGCAAACAAAAAGTTTTTGTACGATGTAGGAGTTTATGAC +TCTGGTGATGATGACGACGAAAATGGCATGGGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAA +TGAAAATCAGGGAGAAGAAACCTTTGAGGAATTGCAGGATATGTTTAATGAAATGTTCAACAGTGATAATGGAACGTTTT +CTTCTTCTTCTTCTTCTTCTTCTTCTTGGACTGGAACTCCTTCAATGTGCTCTACTACATCATCTACATCTTCAAGTGAG +ACTTTTTTAACCTTTCCCAACAAGAGAAGTTCAGGTGAAATGAAGTCGGGTAGTAGTGTAAGAGGCGATTCTTGCCAATT +CCAAGGATTTTGTGTAGGGGCAGGTGGAACTTCTGGAAAATGCAATGAAAGAGAACGAAGTTGGAGGAAAAATTCCAAGA +GTGGACGGAAGCAT +>gnl_Sorbi1.4_PACid_1968370 +ATGGACGCCGGGGGAGAGAAGTTCAGCGACGCGGCGGCGGCGGAGGGCGGTGAGGGCGGCGGCGACCTCTACGCCGTCCT +CGGGCTCAAGAAGGAGTGCTCCGACGCCGACCTCAAGGTCGCTTACCGGAAGCTCGCCAAGAAATGGCACCCGGACAAAT +GCTCCTCCTCCAGCAGCGTGAAACACATGGAGGAAGCCAAGGAGAAGTTCCAAGAGATCCAGGGCGCCTATTCCGTACTC +TCTGACGCCAATAAACGGCTCCTCTACGATGTTGGAGTATACGACGATGAGGACGACGAGGATAGCATGCAGGGGATGGG +TGACTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGTGCGGCCGACGAGGCAGGAAAGCTTTGAGGAGCTGCAGCAGC +TTTTTGTGGACATGTTCCAGTCTGATATTGATTCAGGATTCTGCAACGGGTCTGCTAAGGATCAAGTTCAGGGGCAAGCC +AAAAGTAGAACATGCTCGACCTCACCTTCATCATCACCGTCCCCACCTCCTCCTCCTACTATAGTAAAGGAGGCAGAGGT +GTCATCATGTAATGGCTTCAATAAGCGGGGTTCATCAGCAATGGACTCAGGGAAGCCTCCAAGGCCTGTTGAAGGCGGTG +CTGGTCAGGCTGGATTTTGTTTTGGGGTGAGCGATACGAAGCAAACGCCGAAGCCGAGAGGTCCGAACACCAGCCGGAGG +AGGAACGGCCGGAAACAGAAGCTGTCATCCAAGCACGATGTTTCATCTGAAGATGAAACGGCCGGTTCC +>gnl_Thepa2.0_Tp3g12470 +ATGGCGTCGAACAATAGCGAGAAAGGAAACGATGATTTGTATGGTGTTCTGGGCTTGAAGAAGGAATGTACGACGACGGA +GCTCCGTACTGCTTATAAGAAGCTTGCTCTAAGATGGCATCCAGATCGTTGTTCGTCAATGGGGACTCCAGAGTTTGTAG +ACGAAGCAAAGAAGAAGTTTCAGGCAATCCAAGAGGCCTATTCTGTTCTGTCTGACTCCAACAAGAGGTTCCTCTATGAT +GTTGGAGCTTATAACAGTGATGATGAAGACCAAAACGGTATGGGAGATTTCTTGAACGAAATGGCGGCAATGATGAATCA +GTCCAAGCCTAGTGAGAATAACTCAGGGGACAGTTTTGAGCAGCTACAAGATCTGTTTAATGAGATGTTTCAAGGAGACG +CTGCAGCATTCTCATCATCATCATCATCATCTTGCTCTGCTTCGACTTTTACTTCCTCTTGTAGCTTTGTCTTTGACACA +AATAGTCAGCGGTCACCGTTTGAGACAAGCTCAATGGGGACTAATGATCTTTTTGGATTCGATCACAGTGCTCACACCTT +CTCTTTAGGGGTGGAACATCAGCAGGATTTCAAGAAGGGGAAGAACAGTGGTGGAAGAAGAAACAGAAGGAAGAACAATG +CTCAATCGGCTGCTCACGAGACGGCGTCGTCCAACAACTATGGAGTCCCCACTTCA +>gnl_Theca1.0_Tc06_g010450 +ATGGCAAATGGAGAAGAGAAAAACAATGATTTTTATGCAGTTTTGGGGTTGAATAAAGAATGCACTCCGACAGAGCTCAG +GACTGCTTATAAGAAACTTGCCCTGAGATGGCACCCTGATCGTTGCTCAGCTTCGGGAAATTCAAAGTTCGTGGAAGAAG +CCAAGAAGAAATTTCAGGCCATTCAACAAGCCTATTCTGTTCTGTCTGACTCAAACAAGAGGTTTCTGTACGACGTAGGA +GCTTATGACAGTGATGATGACGAAAATGGAATGGGAGATTTTTTGAACGAAATGGCAGGGATGATGAGCCAGACAAAATC +TAATGAAAATGGAGGGGAAAGCTTCGAGGAACTACAGGAATTGTTTGAAGAAATGTTCCAAGCGGACATTGATTCATTTG +AGTCTACTGGTCAGTCCACTCCTTCCTGCTCTGCTTCATCTTCGTTTGGGTCATATGGTGAAAGTTCCAGCTCCAACAAG +CGGAATTCCTCTGAAATGAGTTCTGTGGAGACTAGGCTGGAGAGTTCTTCTAGCTTCGATGCACAATTTCACAGTTTTTG +TCTCGGGGTGGAACACAGGCAAGATATCAAGCAACACAGAGGAGCCAGAGGAGGAATGCGAGGAGCAGCCGGCGGTAGTA +GACGGAGAAATGGCAGGAAACAAAAGGTTTCATCTGGCCATGATGTTACTTCCAACGACTGTGGCATTTCTGCTTCA +>gnl_Vitvi12X_PACid_17827068 +ATGGCCGCCGGAGAAGAGAAGAGCAATGATTTTTATGCCGTTCTAGGGTTGAAAAAGGAATGCACCGCCTCCGAGCTCAG +AAATGCGTACAAGAGACTTGCCCTGATGTGGCACCCAGATCGTTGCTCCTCGTCGGGAAACTCGAAATTCGTGGAAGAAG +CGAAGAAGAAATTTCAGGCCATACAAGAAGCCTATTCAGTTCTCTCTGATGCGAATAAAAGGTTTCTGTACGACGTTGGA +GCCTACGACAGCGATGATGACGAAAACGGAATGGGGGATTTTTTGAATGAGATGGCGGTTATGATGAGCCAAACCAAGTC +CAATGAAAATGGGAAGGAGAGCTTTGAGGAGTTGCAGGAGCTCTTTGAGGATATGTTCCAAAGGGATGTCGACGCATTCA +ACTCTGCCTCTCATCACCCCATGAACTCTTTCCCCAGTTCTACTTCCACTTCTTCCTACTGCGAAAGCTCCAATGCCAAC +AACAAGCGGAATTCGGCTGAAATGGGCTCTGGAAGGATGATGAGTGCAGGGGAGTCCTCTGCTTTTGATGCCCACTTTCA +GAGCTTCTGCTTTGGGACAGGCGGCACGCCAGGGAGATTTCAGGAGGGGGAAAGGAGCAAGAGGAGGAATTCCAGGAGGA +GCCAACGG +>gnl_Selmo1.0_PACid_15401289 +ATGGAGAAGAGGAAAGAGGATCCCTACACTGTTCTTGGTGTCCAAAAGTCGAGTTCTAGCTCGGAAATTCGCTCCGCTTA +TCGGAAGCTCGCCATGAAATGGCATCCAGATAAGCAACACTCTTTAGAGGATCAAGCAAAAGCGAAGTTCCAGGGCATTC +AAGAAGCTTATTCAGTGCTATCCGACGACAAAAAAAGAGTTCTTTATGATTCGGGACTTTATGACGAGGGAGATGACGAG +GTGAGT +>gnl_Orysa6.0_PACid_16864430 +ATGGCCCGCGGCGGCGGCGGCGGCGGCGGCGCGGACGCCGACCTGTACGCCGTCCTCGGCCTCAGCAGGGAGTGCACCGA +CGCCGACCTCAGGCTCGCCTACCGCAAGCTCGCCATGATATGGCATCCGGACAGGTGCTCGGTGGCCGGCGGCAGCGCGA +GCGCGGCGGGCGTCGACGAGGCCAAGGAGCGATTCCAGGAGATCCAGGGCGCCTACTCCGTGCTCTCCGACTCCAACAAG +CGCTTCCTCTACGACGTCGGCGTCTACGACGGCAACGACGGCGACGACGACGACGACGAAGCAGATCTGTCGGGGATGGG +CGATTTCCTCGGCGAGATGGCGCAGATGATGAGCCAGGCGACGCCTGCGGAGAGCTTCGAGGAGTTGCAGCAGCTGTTCG +TGGACATGTTCCAGGACGACATCGACGCCGGCCTCTGCCAGTCGACGCCGCCGCCGCCGTCATGGCCGTCGCCTCCGGCG +GCCGCCAATGCACGATCGCCGGCGGCGGCGGCGACTTCACGCAAGGGCGTGAACAAGCGGTGCTCACCGGCGGCGATGGA +CATGGACTCCGGTTTGAGCAGCCTGCTGGGCATTTCGGGCTTCTGTTTCGAGGCGCCATGGACGTCGCAGGACGCGAGCA +CTGCCGCCGGCGGTGGCGGCGGCAAGAGGAGAAAGCAGAGGCCGCCGCCGGCGAGCCACAACGTG +>gnl_Sorbi1.4_PACid_1982925 +ATGGCTGCTACAAGTCACTGCGGCAACATCCAGGACCAGGACGAAGAAGCTTCGGCTCCTGGCGCCGCCGACCTCTACGC +CGTGCTCGGGCTCAACAGGGAGTGCACCGACGCCGAGCTCAGGGTCGCGTACCGGCGGCTCGCCATGATATGGCATCCGG +ACAGGTGCTCGGCGTCCGGCAGCTCGCCGGCGCGCATGGAGGAGGCCAAGGAGCGGTTCCAGGAGATCCAGGGCGCCTAC +TCCGTGCTCTCCGACTCCAACAAGCGGCTCCTCTACGACGTCGGCGTCTACGACAGCGACGACGACGAGGCTGACCTGTC +GGGGATGGGCGACTTCCTCGGAGAGATGGCCGACATGATGAGCCAGGCCACGCCAACGGAGACCTTCGAGGAGCTGCAGC +AGGTGTTCGTGGACATGTTCCAGGACGACCTGGACGACGCCGGCTTCTTCGGCGGGCTTCCGACGACGGGCCGCAGGGCC +CAGGCACCCAGCACCTCGCTGCCGCCGTCGGTGTCGTCGTCGCCGTTGCGGCCGACGCCTGCCGCTGGAAGAAGCAAGGG +TCCGCAAGCGACGCCGTCGTCGTCGTTTAAAGGCGTCGAGAGGCGGGGTTCGACGTCGACGGCGAAACGGCCGAGGCCCA +ACGGGTCGGCGGGCCTGGAATCGGACCTGGGCCTCTCCGGATTCTGCTTCATGGTGAGTAAGGAGATGAGCAAGTCGAAG +GAGAGGCAAGCGGTATGGGCCAGTGACGACGGTGACAGGAGCACCGATGGCAAGCAGAGGTTGTCGACGAGCCGCGATGT +CTCCGGTGGTGGGATGTCACGCTCACTGCAGGGCCAAAGCAGCAAAAACTTGTTGCAGTGTATGGCCTCTAAGTCT +>gnl_Medtr3.5_Medtr8g022310.3 +ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG +GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG +CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA +GTTTACGACAGTGATGATGACGAAAATGTAAGGCACTTGTTTCACACCATTCATGAGTTGGGGACCCTCTTTTGCGTTAT +GTTTTGTTTCTTCATTTCCTTGAGGGGAGAGAAGAGAAGCAACCTTAATTTAACCTTTTCACTTTCACAT +>gnl_Nelnu1.0_NNU_000115-RA +ATGGAGGTGGACTCCCATCGATCATCTCCATCTTACTACACCATCCTTGGTGTAGATCAGAATTCCTCCGCTTCCGAGAT +ACGCAATGCTTACAGGAAGCTCGCGATGCAATGGCATCCAGACAAATGGACGAAAACTCCGTCGCTCTTAGAGAAAGCCA +AGAGTAAATTCCAGCAAATCCAGGAGGCTTATTCGGGTGGGTTACTCGTTTTCATGTTATCGGATCAGGGGAAGAGAACA +CTGTATGATGTCGGTCTGTATGACCCGGACGATGAAACGAATGACGAGGTGGGGCTTCGCAGATTTCATGCAGGAGATGA +TATCTCTCATGAACGATGTGAAGAAACAGGAGAAGAAATACAGCTTGGAGGAACTACAGGAGATGTTAGTGGAAATGTCA +CAAGGGCTGGAGTTGAAGATGGAGAGTGCTGGTGGTGTGTGGTTGTTAGATGGAGCTGCAGCCTCAAGGAGGAGCTCAAA +GAGGGCCAGATGGGAATCATCAGCGAGTCCGACGACGCTGGACACGACACACCTTCTCTCCCCCACTTGCACGGTTCAGA +GCTGGAATTGTTAGGAAGAACCGGCTGTTGCAAT +>contig_7 +GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC +CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT +CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA +AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3722.fna.aln Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,819 @@ +>gnl_Glyma1.01_PACid_16266208 +---------ATGGCTAATGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAAGAAAAGC +AATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAAGAAT +GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGG------ +---AATTTAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT +TCT------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG +GGAGTCTACGACAGT------------------------GATGACGACGAAAACGGCATG +GGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATCAAATGAAAATGGA +------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGCTTTTTGAA +GACATGTTTCAAGCGGATATT------GGATTG--------------------------- +---GATGGAGGCCCTTCTCTT--------------------------------------- +------------GCTTCTTCTGATTCCTCAACTTCATCTGCT------------------ +------------------------------------TACATGACTTACAGTGAAAGTTCT +AGT---------------------------TCAAATAAACGCAATTCCTCT---GAGATG +AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCCAGTTAC------ +------------CAGAATTTCTGTTTTGGG---GTGAAC------CAACTCCAAGATATA +AAGAAGGGAAAG---------GGGGGAATTCTAGGAGGAGGAGGTAGAAGTAGACACAGA +AGTGGCAGAAAGCAAAAAATGTTCTAT---GGCCATGATGTT------------------ +------------------------------------------------------------ +>gnl_Glyma1.01_PACid_16266209 +---------ATGGCTAATGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAAGAAAAGC +AATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAAGAAT +GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGG------ +---AATTTAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT +TCT------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG +GGAGTCTACGACAGT------------------------GATGACGACGAAAACGGCATG +GGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATCAAATGAAAATGGA +------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGCTTTTTGAA +GACATGTTTCAAGCGGATATT------GGATTG--------------------------- +---GATGGAGGCCCTTCTCTT--------------------------------------- +------------GCTTCTTCTGATTCCTCAACTTCATCTGCT------------------ +------------------------------------TACATGACTTACAGTGAAAGTTCT +AGT---------------------------TCAAATAAACGCAATTCCTCT---GAGATG +AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCCAGTTAC------ +------------CAGAATTTCTGTTTTGGG---ACAGGT------GAACCAACTCCAAGA +TATAAAGAAGGG------------------------AAAGGGGGGAATTCTAGGAGGAGG +AGG--------------------------------------------------------- +------------------------------------------------------------ +>gnl_Glyma1.01_PACid_16266210 +---------ATGGCTAATGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAAGAAAAGC +AATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAAGAAT +GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGG------ +---AATTTAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT +TCT------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG +GGAGTCTACGACAGT------------------------GATGACGACGAAAACGGCATG +GGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATCAAATGAAAATGGA +------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGCTTTTTGAA +GACATGTTTCAAGCGGATATT------GGATTG--------------------------- +---GATGGAGGCCCTTCTCTT--------------------------------------- +------------GCTTCTTCTGATTCCTCAACTTCATCTGCT------------------ +------------------------------------TACATGACTTACAGTGAAAGTTCT +AGT---------------------------TCAAATAAACGCAATTCCTCT---GAGATG +AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCCAGTTAC------ +------------CAGAATTTCTGTTTTGGG---GTCGGT------CATGTAAACTATCAT +TACCAA------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +>gnl_Glyma1.01_PACid_16301083 +---------ATGGCCGATGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAACAAAAGC +AATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAAGAAT +GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGG------ +---AATTCAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT +TCA------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG +GGAGTCTACGACAGT------------------------GATGACGACGAGAACGGCATG +GGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATCGAATGAAAACGGA +------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGTTGTTTGAA +GACATGTTTGAAGCAGATATT------GGATTG--------------------------- +---GACGGAGGCCCTTCTCTT--------------------------------------- +------------GCTTCTTCTGATTGCTCAACTTCATCTGCT------------------ +------------------------------------TACATGACTTATAGTGAAAGTTCT +AGT---------------------------TCAAATAAACACAATTCCTCT---GAGATG +AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCTGGTTAC------ +------------CAGAATTTCTGTTTTGGG---GTGAAC------CAACTCCAAGATATA +AAGAAGAAAAAG---------GGGGGAATTCTA---GGAGGAGGTAGAAGTAGACACAGA +AATGGCAGAAAGCAAAATATGTCCTAT---GGCCATGATGTTTCATCGAATGACTACCCT +GGAATTTCCACAAAG--------------------------------------------- +>gnl_Glyma1.01_PACid_16301085 +---------ATGGCCGATGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAACAAAAGC +AATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAAGAAT +GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGG------ +---AATTCAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT +TCA------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG +GGAGTCTACGACAGT------------------------GATGACGACGAGAACGGCATG +GGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATCGAATGAAAACGGA +------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGTTGTTTGAA +GACATGTTTGAAGCAGATATT------GGATTG--------------------------- +---GACGGAGGCCCTTCTCTT--------------------------------------- +------------GCTTCTTCTGATTGCTCAACTTCATCTGCT------------------ +------------------------------------TACATGACTTATAGTGAAAGTTCT +AGT---------------------------TCAAATAAACACAATTCCTCT---GAGATG +AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCTGGTTAC------ +------------CAGAATTTCTGTTTTGGG---GTAGGT------CATGTAAACTATCAT +TACCAA------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +>gnl_Glyma1.01_PACid_16301084 +---------ATGGCCGATGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAACAAAAGC +AATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAAGAAT +GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGG------ +---AATTCAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT +TCA------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG +GGAGTCTACGACAGT------------------------GATGACGACGAGAACGGCATG +GGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATCGAATGAAAACGGA +------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGTTGTTTGAA +GACATGTTTGAAGCAGATATT------GGATTG--------------------------- +---GACGGAGGCCCTTCTCTT--------------------------------------- +------------GCTTCTTCTGATTGCTCAACTTCATCTGCT------------------ +------------------------------------TACATGACTTATAGTGAAAGTTCT +AGT---------------------------TCAAATAAACACAATTCCTCT---GAGATG +AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCTGGTTAC------ +------------CAGAATTTCTGTTTTGGG---ACAGGT------GAACCAACTCCAAGA +TATAAAGAAGAA------------------------AAAGGGGGGAATTCTAGGAGGAGG +------------------------------------------------------------ +------------------------------------------------------------ +>gnl_Medtr3.5_Medtr8g022310.1 +---------ATGGCTAACGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAACAAAAGC +AATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAGGAAT +GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGG------ +---AATGTGAAGTTTGTGGAAGAAGCTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTAT +TCT------------------GTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTT +GGAGTTTACGACAGT------------------------GATGATGACGAAAATGGTATG +GGAGACTTTCTGAATGAAATGGTTACAATGATGAGCCAAACTAAATCAAATGAAAATGGA +------GAGGAGAGCTTCGAGGAGTTACAA------------------CAGTTGTTTGAT +GATATGTTTCAAGCGGATATC------GGATTA--------------------------- +---AATGGAAGCACCTCTCTT--------------------------------------- +------------AATGCTTCGGGTTGCTCCACTTCATCGACT------------------ +------------------------------------TTCATGACGTTCAGTGAAAGCTCG +AAT---------------------------TCAAATAAGCGCAATTCCACT---CAAATG +AATTTTGGGAAG---------GCAGAGGATTCTTCTAGTTTTGGTGCAAATTAC------ +------------CAGAACTTCTGTTTTGGG---ATGAAG------CACCTTCAAGAAGAT +GTGGAGAAGGAA------AAAGGGGGAATTCTAGAAGGAGGAGGTAGCAAAAAACAAAGA +AAAGGAAGAAAACAAAAAATTTCATGT---GGACAT---GTTTCCTCTAATGACCATCCT +GGTATTTCTGCTAAT--------------------------------------------- +>gnl_Medtr3.5_Medtr8g022310.2 +---------ATGGCTAACGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAACAAAAGC +AATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAGGAAT +GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGG------ +---AATGTGAAGTTTGTGGAAGAAGCTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTAT +TCT------------------GTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTT +GGAGTTTACGACAGT------------------------GATGATGACGAAAATGGTATG +GGAGACTTTCTGAATGAAATGGTTACAATGATGAGCCAAACTAAATCAAATGAAAATGGA +------GAGGAGAGCTTCGAGGAGTTACAA------------------CAGTTGTTTGAT +GATATGTTTCAAGCGGATATC------GGATTA--------------------------- +---AATGGAAGCACCTCTCTT--------------------------------------- +------------AATGCTTCGGGTTGCTCCACTTCATCGACT------------------ +------------------------------------TTCATGACGTTCAGTGAAAGCTCG +AAT---------------------------TCAAATAAGCGCAATTCCACT---CAAATG +AATTTTGGGAAG---------GCAGAGGATTCTTCTAGTTTTGGTGCAAATTAC------ +------------CAGAACTTCTGTTTTGGG---GTCAAT------CTTGTAAATTATCAT +TACCAA------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +>gnl_Musac1.0_GSMUA_Achr6T31040_001 +---------ATGGCGGCCGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GAGGACAAAAGC +GGCGACTTCTACGCCGTGTTGGGGCTCAGGAAGGAGTGCTCCGAAACCGAGCTGAGGAAT +GCGTACAAGAAGCTTGCCATGAGGTGGCATCCGGATAAGTGCTTGGCTTCGGGA------ +---AATGCTCAAATCGTGGGGGAAGCCAAGGAGAAGTTTCAGGAGATCCAGAAAGCCTAC +TCT------------------GTTCTCTCAGACTCCAATAAGAGATTCCTGTACGATGTG +GGAGTCTACGACAACGAT------------------GATGACAATGACGAAAACGGTATG +GGAGACTTTATAGGGGAGATGTTGGAGATGATGAGCCAAACGAAACCCAATGAGAACAGC +------CAAGATAGCTTCCAGGAGCTGCAG------------------GAGCTGTTTGTG +GAGATGTTCCAGGACGACCTG------GATGCCGGA------------------------ +---TTTGGTGGTTCCATCTTCCAC---GATTGCCCCTGGGCTCAACCG------------ +---------ACCAATGGCCAAGATTGCTGGACTTCATCGGGA------------------ +------------------------------------CTGCACTTTGCTAATGGAAGGAGT +AAG------------------------TGTGGCAACAAGCGGGGCAACTCA---GCTGTG +AACTTGGGAAAG------------GTCAATCTTGAAGAGTTGGAACATGGTACC------ +------------AGCGACTTCTATTTTGGG---CTAAAT------GATGCAGCACAGCCA +TCACAAGGGAAAGGAGGCAGT------------------------AATAACAAGAGAAGG +AATGGAAGAAAGCAAAAGGTTTCATCC---AATCATGATGTCTCATCC------------ +------------------------------------------------------------ +>gnl_Musac1.0_GSMUA_Achr9T18140_001 +---------ATGGCCGCCGGG--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GAGGAAAAGATC +GGCGATTTCTACACGGTGCTGGGGCTCAGGAAGGAGTGCTCGGAGGCGGAGCTGAGGATC +GCGTACAAGAAGCTGGCTATGAGATGGCATCCGGATAAGTGCTCGGCTTCGGGA------ +---AACCATCGAAGGATGGAGGAAGCGAAGGAGAAGTTCCAGGAAATCCAAAAAGCCTAC +TCT------------------GTTCTCTCGGACTCCAGCAAGAGATTTCTGTATGATGTG +GGAATCTACGATAACGAG------------------GATGATAATGACGAAAAAGGAATG +GGGGATTTTATTGGGGAGATAGCTCAGATGATGAGCCAAACGAAATCTGGGGAGAATGGT +------CATGATAGCTTTGAGGAGCTGCAG------------------CGGATGTTCCTG +GATATGTTCCAGGACGACCTG------GACGCCGGA------------------------ +---TTCGGTGATTCTTCCATCCAC---AGTGGCCCCCAAGCTCGGCCA------------ +---------ACCGACGGTCTCAATTGCTCGATGCCATCAGGA------------------ +------------------------------------CTGCAGTTTGCTGATGGAGGGAAC +AAT------------------------GGCAGCAACAAGAGAGGCAACTCG---GAGAAG +------------------------GCAAAGCTGGATGGGTTGGAAAACAGTTCC------ +------------ACTGGCTTCTGCTTCGGG---TTGAAT------GATGCAGGGCAGTCA +TCAAAAGGAAAAGGAAGCGCT------------------------AATAGCAAGAGAAGG +AATGGAAGAAAGCAGAAGGTCTCATCC---AAACATGATGTCTCATCCAGTGAT---GCT +GAGGTCTCATTT------------------------------------------------ +>gnl_Musac1.0_GSMUA_Achr8T23700_001 +---------ATGGCGAGCGAC--------------------------------------- +------------------------------------------------------------ +------------------------------------------------ATGGATGCAAGC +GGCGATTTCTACTCGGTGCTGGGGCTGAAGAAGGAGTGCTCCGAGGCGGAGCTCAGGAAT +GCGTACAAGAAGCTCGCTTTGAAGTGGCATCCCGATAAGTGCTCGGCGTCGGGT------ +---AATGAGATTCGCATGAAGGAAGCGAAGCAGCAGTTCCAGGAGATCCAGAAAGCCTAC +TCT------------------GTTCTCTCCGACTCCAACAAGAGATTTCTGTACGATGTT +GGAGCCTACGACAAAGAC------------------GACGACAAAGACGAAGAGGGGATG +GTGGAGTTTCTTGGGGAGATGGCGCAAATGATGAGGCAAACCAAATGCTGTGGGAGCGGC +------CAGGAGAGCTTCGAGCAGCTGCAG------------------CAGATGTTCGTG +GAGATGTTCCACGACGATCTG------GACGCGGGA------------------------ +---TTC------------------------------------------------------ +------------------------TGCGGCCACTCCTCGGCC------------------ +---------------------------------------------ACCTCGGGCGCGGCG +TCC------------------------TGCGGCAACAAACGGGACAACTCG---GCGATG +GACTCGGGCAAG------------CGGAAGCCGGACGAGTTGGACCCGGCCGCC------ +------------ATTGGGTTCTGCCTCGGG---ACAAAG------GATGCAGGGCAATCC +TCAAAAGGAAGAGGTAGC---------------------------AACAGCAAGAGAAGG +AACAGAAGAAAGCAAAAGGCATCATCC---AAGCATGACAACTCATCTCACAAT---GCT +AAGGTCTCAGCT------------------------------------------------ +>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001 +---------ATGGAGGGGGAC--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GAGGAGAAGAGC +GGGGATTTCTACGCGGTGCTGGGGCTGAAGAAAGAGGGCTCCATGGCGGAGCTCAAGAAT +GCGTACAAGAAGCTGGCGATGAAGTGGCATCCCGATAAGTGTCCTGCGTCAGGC------ +---AATAAGATACGCATGGATAAAGCGAAGGAGAAGTTCCAGGAGATCCAAAAAGCCTAC +TCT------------------GTTCTCTCCGACTCCAACAAGCGATTCCTGTACGATGTC +GGAGTTTACGACAAAGAC------------------GATGAGGAAGATGAAGAGGGGATG +GGGGACTTCATTGGGGAGATCGCGCAAATGATGAGCCAGTCCAAACCCAGCGGGAGCGGC +------CACGAGAGCTTGGAGGAGCTGCAT------------------CGGCAGGTCGTG +GAGATGTTCCTCGACGAACTG------GACGCCGGAGAT--------------------- +CGCTTCTCCTCGGCCAACCAA--------------------------------------- +------------GGCGCGTCGTCCTGCGACGGCAGGGACGAC------------------ +------------------------------------------------------------ +------------------------GGCGGCGGTAACAAGCGTGGCAACTGG---GCGGTG +GACTGGGGCAAG------------GAGAAGCTGAACGAGTTGGGCCCGGGCACC------ +------------GGCGGGTTCTGCTTCGGG---GTGAGT--------------------- +---------------------------------------------------CGCCGAGTC +CACTCCTTTGATCTTATGATAGACGTA---GTCCACCTCATCCATTCTGATCTG------ +---------------------------------------------------ACTCTGGAA +>gnl_Orysa6.0_PACid_16843526 +---------ATGGCCGACGGGGGA------------------------------------ +------------------------------------------------------------ +---------------GAGAAGTGC---CGGGACGCGGCCGGC---GAGGGCGGCGGCGGC +GGCGACCTGTACGCCGTGCTCGGGCTCAAGAAGGAGTGCTCCGACGCCGACCTCAAGCTC +GCGTACCGGAAGCTCGCCATGAGATGGCATCCGGACAAATGCTCATCCTCCAGC------ +---AGTGCAAAGCACATGGAGGAAGCCAAGGAGAAGTTCCAGGAGATCCAGGGCGCCTAT +TCC------------------GTCCTCTCAGACTCAAACAAGCGGTTCCTCTACGACGTG +GGGGTATATGATGATGACGAC------------AATGACGATGACAACCTGCAGGGGATG +GGGGACTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGCACGGCCAACGAGG------ +------CAGGAGAGCTTTAAAGAACTGCAG------------------CAGCTATTCGTA +GACATGTTCCAAGCTGATCTT------GATTCGGGT------------------------ +---TTCTGCAATGGACCCTCAAAGTGCTACCATACCCAGGCCCAAAGCCAGACTCGAACA +TCCTCAACCTCCCCTTCGATGTCACCGTCTCCACCGCCTCCA------------------ +------------------------------------GTAGCTACTGAGGCAGAATCGCCA +TCA------------------TGTAATGGTATTAATAAGCGTGGTTCATCA---GCAATG +GACTCTGGGAAG---CCTCCAAGAGCCAGCGAAGTCGGTTCTGGACAGAGTCAA------ +------------TCAGGGTTTTGTTTCGGG---AAGAGT------GATGCTAAACAAGCG +GCGAAGACGCGAAGCGGGAACACG---------------------GCCAGCCGGAGGAGG +AACGGCCGGAAGCAGAAGGTGTCGTCG---AAGCACGACGTCTCGTCTGAGGACGAGATG +CCAGGTTCGCAGTGG---------------------------------CACGGCGTGGCC +>gnl_Orysa6.0_PACid_16843528 +---------ATGGCCGACGGGGGA------------------------------------ +------------------------------------------------------------ +---------------GAGAAGTGC---CGGGACGCGGCCGGC---GAGGGCGGCGGCGGC +GGCGACCTGTACGCCGTGCTCGGGCTCAAGAAGGAGTGCTCCGACGCCGACCTCAAGCTC +GCGTACCGGAAGCTCGCCATGAGATGGCATCCGGACAAATGCTCATCCTCCAGC------ +---AGTGCAAAGCACATGGAGGAAGCCAAGGAGAAGTTCCAGGAGATCCAGGGCGCCTAT +TCC------------------GTCCTCTCAGACTCAAACAAGCGGTTCCTCTACGACGTG +GGGGTATATGATGATGACGAC------------AATGACGATGACAACCTGCAGGGGATG +GGGGACTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGCACGGCCAACGAGG------ +------CAGGAGAGCTTTAAAGAACTGCAG------------------CAGCTATTCGTA +GACATGTTCCAAGCTGATCTT------GATTCGGGT------------------------ +---TTCTGCAATGGACCCTCAAAGTGCTACCATACCCAGGCCCAAAGCCAGACTCGAACA +TCCTCAACCTCCCCTTCGATGTCACCGTCTCCACCGCCTCCA------------------ +------------------------------------GTAGCTACTGAGGCAGAATCGCCA +TCA------------------TGTAATGGTATTAATAAGCGTGGTTCATCA---GCAATG +GACTCTGGGAAG---CCTCCAAGAGCCAGCGAAGTCGGTTCTGGACAGAGTCAA------ +------------TCAGGGTTTTGTTTCGGGCAGAAGAGT------GATGCTAAACAAGCG +GCGAAGACGCGAAGCGGGAACACG---------------------GCCAGCCGGAGGAGG +AACGGCCGGAAGCAGAAGGTGTCGTCG---AAGCACGACGTCTCGTCTGAGGACGAGATG +CCAGGTTCGCAGTGG---------------------------------CACGGCGTGGCC +>gnl_Poptr2.2_PACid_18217800 +---------ATGGCAAACGGAGGAGAA--------------------------------- +------------------------------------------------------------ +---------------------------------------------GATAAATGGAAAAGC +AATGACTTATATCAAGTCTTGGGGTTGAATAAGGAATGCACTGATACAGAGCTCAGGAGT +GCTTATAAGAAACTTGCACTGAGATGGCATCCAGATCGATGTTCAGCTTCAGGA------ +---AATTCTAAGTTCGTTGAAGAAGCCAAAAAGAAGTTTCAGGCAATTCAACAGGCCTAT +TCT------------------GTTCTTTCTGACACCAACAAGAGGTTTCTTTACGACGTT +GGTGTTGATGACAGT------------------------GATGACGACGAAAATGGAATG +GGTGATTTTCTGAATGAAATGGCTGTCATGATGAGCCAAACGAAGCCTAGCGAAAACATG +------GAGGAGAGCCTAGAGGAACTGCAA------------------GAATTATTTGAC +GAGATGTTCCAAGAGGATCTG------CATTCG--------------------------- +---TTTGGGATTGACAGTCAG--------------------------------------- +------------GCTGCTCCTTCATGT------CCTCCTTCT------------------ +------------------------------------TATGTATCCTACAGCGAAAGTTCC +AAC------------------------TCAAATAACAAACGTGTTTCTGCT---GATATG +AACTTGGGGAAG---ACTAAAGTGGATGATTCTTCTAGCTTCAACTCTCACTTT------ +------------GAGAAATTCTGTTTAGGG---ACA------------------------ +---------GGTGGAACAGCAGCAACCTTTCAAGAAGGTGAAGGTGGGAGTAAGAGGAGG +AATTCAAGGAGGAGCCAGCGGCAGACG---AAGGCAAGACAAGAAACAAAGAGTTTCTTC +GGGCTA------------------------------------------------------ +>gnl_Poptr2.2_PACid_18234651 +---------ATGGAAAATGGAGGAGAA--------------------------------- +------------------------------------------------------------ +---------------------------------------------GAGAAAGGGAAAAGC +AATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCACAGAGCTGAGGAAT +GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAA------ +---AATTCCAGGTTCGTTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTAT +TCT------------------GTTCTTTCTGACACCAACAAGAGGTTTCTTTACGACGTT +GGTGTTTATGACAGT------------------------GAAGACGACGAAAATGGAATG +GGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCAAACAAAGCCCCACGAAAACGTG +------GAGGAGAGCTTTGAGGAATTGCAA------------------GGATTGTTTGAG +GAGATGTTCCAAGAGGATTTG------GATTCG--------------------------- +---TTTGGGATTGCCTGTCAG--------------------------------------- +------------GCTACTACC--------------------------------------- +------------------------------------TGTGTGTCATACAGCGAAAGCTCC +AAC------------------------TCAAATGATAAACGTGTTTCTGTC---GATATG +AACTTGAAGAAG---ACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTC------ +------------GAGAAGTTCTGTTTAGGG---GTA------------------------ +---------AGTGGAACACCAGCAATCTTTCAAGAAGGGGAA---GGGAGTAAGAGGAGG +AGTTCAAGGAGGAACCGGCGG--------------------------------------- +------------------------------------------------------------ +>gnl_Poptr2.2_PACid_18234649 +ATGCTAAGAATGGAAAATGGAGGAGAA--------------------------------- +------------------------------------------------------------ +---------------------------------------------GAGAAAGGGAAAAGC +AATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCACAGAGCTGAGGAAT +GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAA------ +---AATTCCAGGTTCGTTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTAT +TCT------------------GTTCTTTCTGACACCAACAAGAGGTTTCTTTACGACGTT +GGTGTTTATGACAGT------------------------GAAGACGACGAAAATGGAATG +GGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCAAACAAAGCCCCACGAAAACGTG +------GAGGAGAGCTTTGAGGAATTGCAA------------------GGATTGTTTGAG +GAGATGTTCCAAGAGGATTTG------GATTCG--------------------------- +---TTTGGGATTGCCTGTCAG--------------------------------------- +------------GCTACTACC--------------------------------------- +------------------------------------TGTGTGTCATACAGCGAAAGCTCC +AAC------------------------TCAAATGATAAACGTGTTTCTGTC---GATATG +AACTTGAAGAAG---ACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTC------ +------------GAGAAGTTCTGTTTAGGG---GTGGAA------CACCAGCAATCTTTC +AAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACC---GGCGGTAGAGAGAGG +AAAGGCAGGAAACAAGAAGTTTCATCT---GGCTATGATGTCTCCTCCCATGAC---CAT +GGTATTTCTGCTTCA--------------------------------------------- +>gnl_Poptr2.2_PACid_18234650 +---------ATGGAAAATGGAGGAGAA--------------------------------- +------------------------------------------------------------ +---------------------------------------------GAGAAAGGGAAAAGC +AATGACTTTTATCAGGTTTTGGGGTTGAATAAGGATTGCACTGCCACAGAGCTGAGGAAT +GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGATGTTCAGCTTCGGAA------ +---AATTCCAGGTTCGTTGACGAAGCCAAAAAGAAGTTTCAGACAATTCAACAGGCCTAT +TCT------------------GTTCTTTCTGACACCAACAAGAGGTTTCTTTACGACGTT +GGTGTTTATGACAGT------------------------GAAGACGACGAAAATGGAATG +GGCGGATTTATGAATGAAATGGCTGCTATGATGAGCCAAACAAAGCCCCACGAAAACGTG +------GAGGAGAGCTTTGAGGAATTGCAA------------------GGATTGTTTGAG +GAGATGTTCCAAGAGGATTTG------GATTCG--------------------------- +---TTTGGGATTGCCTGTCAG--------------------------------------- +------------GCTACTACC--------------------------------------- +------------------------------------TGTGTGTCATACAGCGAAAGCTCC +AAC------------------------TCAAATGATAAACGTGTTTCTGTC---GATATG +AACTTGAAGAAG---ACAAAGGTGGATGATTCTTCTGGCTTCAATTCTCACGTC------ +------------GAGAAGTTCTGTTTAGGG---GTGGAA------CACCAGCAATCTTTC +AAGAAGGGGAAGGGAGTAAGAGGAGGAGTTCAAGGAGGAACC---GGCGGTAGAGAGAGG +AAAGGCAGGAAACAAGAAGTTTCATCT---GGCTATGATGTCTCCTCCCATGAC---CAT +GGTATTTCTGCTTCA--------------------------------------------- +>gnl_Soltu3.4_PGSC0003DMP400016105 +---------ATG------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------------------GGC +AATGATTATTATGCAGTTTTGGGATTGAAAAAGGAATGCACTGAAACAGAGCTTAGGAAT +GCTTATAAGAAGCTTGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGG------ +---AATTCGAAGTTTGTAGATGAAGCAAAGAAGAAATTTCAGGCAATTCAAGAAGCATAT +TCT------------------GTGTTATCGGATGCAAACAAAAGGTTTCTGTACGATGTA +GGAGTTTATGACTCTGGT------------------GATGATGACGACGAAAATGGCATG +GGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCAG +------GAAGAAACCTTTGAGGAATTGCAG------------------GATATGTTTGAC +GAAATTTTCAATAGTGATAAT------GGGATG--------------------------- +---------TCTTCTTCTTCT--------------------------------------- +------------TCTTCTTCTTCTCGGACTGGAACTCCTTCA------------------ +------------------------------------ATGTGTTCTACTACATCGTCTACA +TCTTCCAGTGAGACC---TTTTTTACCTTTTCCAACAAAAGAAGTTCAGGT---GAAATG +AAGTCGGGTAAA------------------------GGCGATTCTTGCCAATTC------ +------------CAAGGATTTTGTGAAGGG---ACA------------------------ +---------GGTGGAGCATCTGGAAAAAGCAATGAAAGAGAA---CGGAGTCGGAGGAAA +AATTCCAAGAGTGGACGGAAGCAA------------------------------------ +------------------------------------------------------------ +>gnl_Soltu3.4_PGSC0003DMP400016106 +---------ATG------------------------------------------------ +------------------------------------------------------------ +---------------------------------------------------------GGC +AATGATTATTATGCAGTTTTGGGATTGAAAAAGGAATGCACTGAAACAGAGCTTAGGAAT +GCTTATAAGAAGCTTGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGG------ +---AATTCGAAGTTTGTAGATGAAGCAAAGAAGAAATTTCAGGCAATTCAAGAAGCATAT +TCT------------------GTGTTATCGGATGCAAACAAAAGGTTTCTGTACGATGTA +GGAGTTTATGACTCTGGT------------------GATGATGACGACGAAAATGGCATG +GGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCAG +------GAAGAAACCTTTGAGGAATTGCAG------------------GATATGTTTGAC +GAAATTTTCAATAGTGATAAT------GGGATG--------------------------- +---------TCTTCTTCTTCT--------------------------------------- +------------TCTTCTTCTTCTCGGACTGGAACTCCTTCA------------------ +------------------------------------ATGTGTTCTACTACATCGTCTACA +TCTTCCAGTGAGACC---TTTTTTACCTTTTCCAACAAAAGAAGTTCAGGT---GAAATG +AAGTCGGGTAAA------------------------GGCGATTCTTGCCAATTC------ +------------CAAGGATTTTGTGAAGGG---GTGGAG------CATCTGGAAAAAGCA +ATGAAAGAGAACGGAGTCGGAGGAAAAATTCCAAGAGTGGAC---GGAAGCAATAGGATG +GATGCTAAAAGGCAAAAGGTTCTATCA--------------------------------- +------------------------------------------------------------ +>gnl_Ambtr1.0.27_AmTr_v1.0_scaffold00007.329 +---------ATGGCACCCCGA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAGAGAAAGAC +AGTGATTTTTATGCAATTTTAGGGTTGAAGAAGGAGTGCTCTGCTTCAGATCTCAGAAAT +GCGTACAAAAGGCTCGCACTTCGGTGGCATCCAGATAGGTGCTCTGCCTCAGGG------ +---AACACAAAGTTTGTGGAGGAATGCAAGAAAAAGTTCCAGGCCATTCAGCAGGCTTAT +TCC------------------GTGCTCTCGGATGCAAATAAGAGGTTTTTGTACGATGTT +GGAGCATATGGAAGT------------------------GACGATGACGATCAGGGAATG +GGTGAATTTCTTGGGGAGATGGCGGTAATGATGAGCCAGACAAAGCCCAGTGAAAAAGGG +------CCGGAGAGCTTTGAGGATCTACAG------------------AACTTGTTTCAG +GAGATGTTCGAAAGGGATCTG------GACATG--------------------------- +---TTTAAGTCATCGACCTCCCAC------------------------------------ +------------AACAACAACAATGATAACAACAATAATAAT------------------ +------------------------------------CATAGGAGTAGTGACAATAATAAT +TGTAGTAGTGTTCAT---TGTTTTAGTAACACCAACAAGAGGAATTGCTCG---GATATG +AATGCCGGAGAA---GCCTCGGAGGTCGGGCGCTTTGCTTTCTCATGCTATGCGACAGAG +TTCCTGCACAAGCAAACTTTCTCCGTCGGG---GCGGAT------------------GAT +GTGCGATCGGAGTCG------------------------------AGCAACAAGAGACGA +AACGGGAGGAAACAGAAATCCACCTCC---TCTTCAAGGAAAAGT--------------- +------------------------------------------------------------ +>gnl_Aquco1.0_PACid_18141277 +---------ATGGCTAGT------------------------------------------ +------------------------------------------------------------ +------------------------------------------------GAAGAAGAAGCA +AGTGATTTCTATAAAGTTTTGGGATTGAAAAATAACTGTTCTTCCTTGGAGCTCAGGAAC +GCTTATAAGAAGCTTGCACTGAAATGGCATCCGGATCGTTGTGCTGCTTCAGGA------ +---AACTCAAAGTTTGTTGAAGAAGCAAAGAAGAATTTTCAAGCAATACAAGAAGCTTAT +TCT------------------GTTCTTTCAGATGAGCAGAAACGATTTATGTATGACGTT +GGTGTCTACGATAAAGAT------------------GATGATGACGAAAATGAAGATATG +GGCGATTTTTTAGGTGAAATGATGTCTATGATGAAGCAAGAAAATACTAGTGCGGATGGA +------CAACAAAGTTTTGAAGACCTTCAA------------------AACCTATTCCAG +GAAATGGTTCAAAATGACAAA------GAATTT--------------------------- +---TATAATCCAGCCTCTCAA--------------------------------------- +------------AACTCGTCGATATATAATGCTAGTAACAACATGTTCTCCTTCTCTAAT +AACGAGAACTTAAACAATGCTAGCAACAACACATTCTCCTCTTTTTATAATGAGAACCTA +AAC------------------------AGCAGCAACAAGAAAAGTTGCTCA---AGTATG +AGCGCAGAAAAT------ACCAAGGTGGACTTTAACATGGAATCATTGGACTTC------ +------------CGCAGCTTTTCTATTGGG---TTAGAA------GGCGGTACATCATTT +CAAAACTCCAAAGGGAGAGGA------------------------GTAACGGGTAGGCGG +ACAGGAAGGAAACAGAAGGGGTCGTCC---TGTAATGATATGTCTTCCCATGAT---TCT +AAGATTTTGGCG------------------------------------------------ +>gnl_Arath10_AT3G14200.1 +---------ATGGCGTCCAGTAAT------------------------------------ +------------------------------------------------------------ +---------------------------------------------AGCGAGAAGATCAAC +GAGAATCTGTACGCTGTTCTGGGTTTGAAGAAGGAATGTTCTAAGACGGAGCTCCGTTCT +GCTTATAAGAAGCTTGCTCTCAGATGGCATCCAGATCGTTGTTCGTCA------------ +------ATGGAGTTTGTAGAAGAAGCAAAGAAGAAATTTCAGGCAATCCAAGAAGCCTAC +TCT------------------GTTCTGTCTGACTCCAACAAGAGGTTCCTGTATGATGTT +GGTGCTTATAATACTGAT------------------GATGATGATGACCAAAACGGAATG +GGAGATTTCTTGAACGAAATGGCGACTATGATGAATCAATCCAAGCCTAGTGATAATAAC +---ACAGGGGACAGTTTTGAACAACTACAA------------------GATCTGTTTAAT +GAGATGTTTCAAGGAGACGCT------GCAGCA--------------------------- +---TTCCCATCATCA--------------------------------------------- +------------------TCGTCCTGCTCCACTTCAAATTTC------------------ +------------------------------------ACTTCATCTCGTAGTTTTGTATTC +GAT------------------------ACAAATTCTCAGCGGTCATCTTCGTTTGCGACA +AGTTCGATGGGG------ATGAATAATGATCCTTTCGGATATGACCCGAGAGCT------ +------------CATTCCTTCTCTTTAGGG---GTGGAC------CATCAGCAAGAGTTC +AAGAAAGGGAAAAAC------------------------------AATGGCGGAAGAAGA +AACAGGAGAAAGAACAATGTTCCATCGGCTGGTCACGAAACGTCGTCGTCAAACAACTAT +GGAGTCCCCACCTCA--------------------------------------------- +>gnl_Bradi1.2_Bradi3g60090.1 +---------ATGGCCACCGGGGGC------------------------------------ +------------------------------------------------------------ +---------------GACAAGTGCGGCGGAAAGCCGGCGGCC---GCGGGGGTGGGCGGC +GGTGACCTGTACTCTGTGCTGGGCGTCAACAAGGAGTGCTCCGACGCCGACCTCAAGGTC +GCCTACCGGAAGCTCGCCATGAGATGGCATCCGGATAGATGCTCCTCCTCCAGC------ +---AGCACCAAGCACATGGAGGAAGCAAAAGAGAAGTTCCAGGAGATCCAGGGCGCCTAT +TCC------------------GTCCTCTCCGATGCCAACAAGCGCTTCCTCTATGACGTG +GGGGTATATGAAGAACATGAAGAA---------GAAGATGATGACACTCTGCAGGGGATG +GGGGACTTCCTTGGTGAGATGGCCCATATGATGAGCCAGACGCAGCCAGCGAGA------ +------CAGGAAAGCTTTGAGGAGCTCCAG------------------CAGCTCTTCGTG +GACATGTTCCAGTCTGATATT------GAATCGGGA------------------------ +---TTCTGCAACGGACCTGCCAAG---GACCATGACCCAGTCCAAAGACAGACGCGAACA +TTCTCGACCCCTCCTTCGCCATCGCCATCTCCACCGCCTCCA------------------ +------------------------------------CTAGCTACAGTGGACGAAGCGGCA +TCA------------------TGTAATGGCATCAATAAGCGTGGCTCATCA---GCAATG +GGCTCTGGGAAG---CCTCCAAGAGCTGGTGAAGTGAGTGGGGGTCACGGCCAG------ +------------TCTGAGTTCTGTTTCGGG---ATGAGC------GACGCCAAGCAAGCG +CCGAAGGCGCGAGGCGGGAAC------------------------GCTAGCAGGAGAAGG +AACGGCCAGAAGCAGAAACTGTCGTCG---AAGCACGACGTCTCCTCCGGCGATGAGATG +CCGAGACCA---------------------------------------CATGCAGCAGTA +>gnl_Carpa1.181_PACid_16420351 +---------ATGGCGGATGGA--------------------------------------- +------------------------------------------------------------ +---------------------------------------------GAAGACAAGAACAAC +AGTGATTTGTATGCGGTTCTTGGATTGAATAAGGAATGTACTCCAGCAGAGCTCAGGAAC +GCTTATAAGAAACTTGCAATGAGATGGCATCCAGATCGCTGTTCCGCGTCGGGG------ +---AATTCAATGTTTGTGGAAGAAGCAAAGAAGAAATTTCAGGCAATCCAAGAAGCCTAC +TCT------------------GTTCTTTCTGACGCAAACAAGAGGTTTCTGTACGACGTC +GGAGCTTACGAAAGT------------------------GATGACGACGAAAATGGAATG +GGTGATTTTTTAAACGAAATGGCAGCCATGATGAGCCAAACAAAGCCTAATGAGAATGGG +AATGCACAAGAGAGCTTTGAAGAATTGCAA------------------GAGTTGTTTCAA +GAGATGTTTCAAGGGGATATGGGATTCAACACA--------------------------- +---TTTGGATCTAGTTCTCAGCCT------------------------------------ +------------ACTACTTCTTCGTGTTCTGCTTCCTCTGCA------------------ +------------------------------------TATGCAACCTGTAGCGAAACCTCC +AAT---------------------CCTAACAACAACAAGCGCAATTCATCA---GAAATG +AATTATGGCAAG---AAAAAGGTAGATGATTCTTCAGGGTTTCATGCTCATTTC------ +------------CAAACCTTTTGTTTAGGG---GTGGAA------CAGCAGCAAGATTTC +AAGAAGGGGAAGGAAGCAAGAGGAGGAATTCGAGGAAAACCC---GGAGGTAGTAGGAGG +CAGGGAAGGAAACAGAAGGTTTCATCT---CGCCACAATGTCTCATCCAATGAC---TTG +GGCATTTCTGCTTCC--------------------------------------------- +>gnl_Frave2.0_gene05408 +---------ATGGCGGGGGGAAAGTGGGTCCCCCCACCCCTGTCCCAGTTTCATCTTCAC +ATAAAGAGGTTCCGTCGACGTCAGAAAGTCTCTGGATCCAGTGGAAACACAGAACCTTCT +GGATACACAGACTGCAAAAATATCTCCAATCGA---------AGAATGGAAGAGAAAGGC +AATGACTTTTATGCTGTTATGGGGTTGAAGAAGGAATGCTCTGACTCGGAGCTCAGGAAT +GCTTATAAGAAACTTGCACTGATATGGCACCCAGATCGTTGCTCTGCCTCAGGA------ +---AATTCAAAGTTCGTGGAAGAAGCCAAGAAGAAGTTTCAGGACATTCAACAAGCCTAT +TCT------------------GTTCTGTCCGACGCCAACAAGAGGTTTCTGTACGATGTA +GGAGCTTATGAAAGT------------------------GATGATGACGAAAATGGAATG +GGTGATTTTTTAAACGAGATGGCGGTGATGATGAGCCAGACTAAGCCGAATGAAAATGGA +------GGAGAGAGCTTCGAACAATTGCAG------------------GAGCTCTTTGAA +GAAATGTTTCAGGGGGATATT------GAGGGC--------------------------- +---TTTAGCTCCTGCTCTCAGCCT------------------------------------ +------------CCTACTTCCTGTTCTACTTCCTCATCTTCA------------------ +------------------------------------TACGCATTGTACTGTGAAAATTCT +ACT------------------------CCCAGTAACAAACGTAATTCCTCC---GCAATG +AATTATGGCAAC------GCAACCCTGGACAGTTCTGGTTTTGATGCTCATTTT------ +------------CACAATTTCTGTGTAGGG---ACA------------------------ +---------GGCGGGAAGCCAGCAAAGGATCGGGAAGGGGAT---GCCAGGAAGAGAAAG +GATTCCAGGAGGAGTAACCGG--------------------------------------- +------------------------------------------------------------ +>gnl_Mimgu1.0_PACid_17694730 +---------ATGGCTGCTGAT--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GAAGAGAAAAGC +AGCGATTTTTACGGCGTTCTGGGGCTGAGGAAAGAATGTACGGCGGCGGAGCTCAGGGTT +GCCTACAAGAAACTTGCAATGAAATGGCATCCAGATCGTTGCTCTGCTTCTGGG------ +---AATTTAAAGTATGTGGAGGAAGCAAAGAACAAGTTTCAAGCTGTCCAACAGGCCTAT +TCT------------------GTGCTTTCCGATGCCAACAAAAGGTTTCTCTACGACGTA +GGAATCTACGATTCTGAA------------------GACGATGCTGACGAAAACGGTATG +GGTGATTTCTTGAATGAAATGGTAGCAATGATGGGCCAAAGTAAACCAAATGAAAATAAA +------AACGAGAGCTTCCAAGAATTGCAA------------------GATCTATTCGAG +GAAATATTCAACAATGACGCG------GAAGAGGTT------------------------ +---TTCAAGATTCCTCCTCCGCAC------------------------------------ +------TTTCCGTACCAAGATTCTTGCAGCGAGACCCGCACC------------------ +------------------------------------------------------------ +---------------------------GCATCGAACAAGAGGAACGCCCGC---GAAATG +GGCTCCGTAAAT---------------TTCAGTAATATCGAAGCCACACCATTT------ +------------GAAGGGTTCTGCATAGGG---GAAAATGTAATTTTTGGGGGAGAGAGA +ATACAAACGAGGCCCGGA---------------------------GGAGGTAGTAGGAGG +ACG------AAGCCGAAGATTTCGACA---TCGATCGAT--------------------- +------------------------------------------------GGTTTAATTAGT +>gnl_Nelnu1.0_NNU_010544-RA +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---ATGAACCTGTTGTTGCAGAAATGGCATCCGGATCGATGCTCCTCGTCGGGA------ +---AACTCTAAGTTCGTGGAAGATTCAAAGAAGAAATTTCAGGCAATTCAAGAGGCTTAT +TCT------------------GTTCTATCCGACGAGAATAAGCGATTTCTTTACGACGTT +GGAGTTTACGACTGCGACGAC---------------GATGACGATGACGAAAACGGAATG +GGAGAATTTTTGGGGGAAATGGCGACTATGATGAGCCAAATTAAACCCAGCGAGAACGGG +------CCGGAGAGTTTGGAGAAGCTGCAG------------------GAACTGTTCGAG +GAAATGTTCCAAAGGGACATG------GATGATGGT------------------------ +---TTCTTCTCCCCCTCCCCCCAATGCGCT------------------------------ +---------TCTTTTTCTTCGTCTTGCTCATCTTCTTCGTCG------------------ +---------------------------TCGACGACTTATTTTTCATATAATAACAACAAG +CAC---------------------------GACAATAAAAGGAATTGCTCC---GACATC +AGTTCTATGGAC------GATTTCTACACATTTGGCACGGATTCTATACAATTC------ +------------AGCAATTTCTGCATTGGG---GTGGAA------GGAGGAGAA---GAT +TCAAAAGTAAGAGGAGGAAAG---------------------------TCAAGGCGGAAG +AGCAACAGGAGACAAAAAGTTTCATCGTCTAAACACGATCCGTCGTGCCGT--------- +------------------------------------------------------------ +>gnl_Solly2.3_Solyc03g123560.2.1 +---------ATG------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------GAAGACAAAAGC +AATGATTATTATGCAGTTTTGGGGTTGAAGAAGGAATGCACTGACACAGAACTTAGGAAT +GCCTATAAGAAGCTTGCACTGAAATGGCACCCAGATCGCTGTTCAGCATCGGGG------ +---AATTTGAAGTTTGTAGATGAAGCAAAGAAGCAATTTCAGGCAATTCAAGAAGCATAT +TCT------------------GTGTTATCGGATGCAAACAAAAAGTTTTTGTACGATGTA +GGAGTTTATGACTCTGGT------------------GATGATGACGACGAAAATGGCATG +GGTGATTTCCTGAATGAAATGGCAGCTATGATGAGCCAAAATAAGTCCAATGAAAATCAG +---GGAGAAGAAACCTTTGAGGAATTGCAG------------------GATATGTTTAAT +GAAATGTTCAACAGTGATAAT------GGAACG--------------------------- +---TTTTCTTCTTCTTCTTCT--------------------------------------- +------------TCTTCTTCTTCTTGGACTGGAACTCCTTCA------------------ +------------------------------------ATGTGCTCTACTACATCATCTACA +TCTTCAAGTGAGACT---TTTTTAACCTTTCCCAACAAGAGAAGTTCAGGT---GAAATG +AAGTCGGGTAGT---------------AGTGTAAGAGGCGATTCTTGCCAATTC------ +------------CAAGGATTTTGTGTAGGG---GCA------------------------ +---------GGTGGAACTTCTGGAAAATGCAATGAAAGAGAA---CGAAGTTGGAGGAAA +AATTCCAAGAGTGGACGGAAGCAT------------------------------------ +------------------------------------------------------------ +>gnl_Sorbi1.4_PACid_1968370 +---------ATGGACGCCGGGGGA------------------------------------ +------------------------------------------------------------ +---------------GAGAAGTTCAGCGACGCGGCGGCGGCGGAGGGCGGTGAGGGCGGC +GGCGACCTCTACGCCGTCCTCGGGCTCAAGAAGGAGTGCTCCGACGCCGACCTCAAGGTC +GCTTACCGGAAGCTCGCCAAGAAATGGCACCCGGACAAATGCTCCTCCTCCAGC------ +---AGCGTGAAACACATGGAGGAAGCCAAGGAGAAGTTCCAAGAGATCCAGGGCGCCTAT +TCC------------------GTACTCTCTGACGCCAATAAACGGCTCCTCTACGATGTT +GGAGTATACGACGATGAGGAC---------------GACGAGGATAGCATGCAGGGGATG +GGTGACTTCATTGGTGAGATGGCCCAGATGATGAGCCAGGTGCGGCCGACGAGG------ +------CAGGAAAGCTTTGAGGAGCTGCAG------------------CAGCTTTTTGTG +GACATGTTCCAGTCTGATATT------GATTCAGGA------------------------ +---TTCTGCAACGGGTCTGCTAAG---GATCAAGTTCAGGGGCAAGCCAAAAGTAGAACA +TGCTCGACCTCACCTTCATCATCACCGTCCCCACCTCCTCCT------------------ +------------------------------CCTACTATAGTAAAGGAGGCAGAGGTGTCA +TCA------------------TGTAATGGCTTCAATAAGCGGGGTTCATCA---GCAATG +GACTCAGGGAAG---CCTCCAAGGCCT------GTTGAAGGCGGTGCTGGTCAG------ +------------GCTGGATTTTGTTTTGGG---GTGAGC------GATACGAAGCAAACG +CCGAAGCCGAGAGGTCCGAAC------------------------ACCAGCCGGAGGAGG +AACGGCCGGAAACAGAAGCTGTCATCC---AAGCACGATGTTTCATCTGAAGATGAAACG +GCCGGTTCC--------------------------------------------------- +>gnl_Thepa2.0_Tp3g12470 +---------ATGGCGTCGAACAAT------------------------------------ +------------------------------------------------------------ +---------------------------------------------AGCGAGAAAGGAAAC +GATGATTTGTATGGTGTTCTGGGCTTGAAGAAGGAATGTACGACGACGGAGCTCCGTACT +GCTTATAAGAAGCTTGCTCTAAGATGGCATCCAGATCGTTGTTCGTCAATGGGG------ +---ACTCCAGAGTTTGTAGACGAAGCAAAGAAGAAGTTTCAGGCAATCCAAGAGGCCTAT +TCT------------------GTTCTGTCTGACTCCAACAAGAGGTTCCTCTATGATGTT +GGAGCTTATAACAGT---------------------GATGATGAAGACCAAAACGGTATG +GGAGATTTCTTGAACGAAATGGCGGCAATGATGAATCAGTCCAAGCCTAGTGAGAATAAC +---TCAGGGGACAGTTTTGAGCAGCTACAA------------------GATCTGTTTAAT +GAGATGTTTCAAGGAGACGCT------GCAGCA--------------------------- +---TTCTCATCATCATCATCA--------------------------------------- +------------------TCATCTTGCTCTGCTTCGACTTTT------------------ +------------------------------------ACTTCCTCTTGTAGCTTTGTCTTT +GAC------------------------ACAAATAGTCAGCGGTCACCGTTT---GAGACA +AGCTCAATGGGG---------ACTAATGATCTTTTTGGATTCGATCACAGTGCT------ +------------CACACCTTCTCTTTAGGG---GTGGAA------CATCAGCAGGATTTC +AAGAAGGGGAAGAAC------------------------------AGTGGTGGAAGAAGA +AACAGAAGGAAGAACAATGCTCAATCGGCTGCTCACGAGACGGCGTCGTCCAACAACTAT +GGAGTCCCCACTTCA--------------------------------------------- +>gnl_Theca1.0_Tc06_g010450 +---------ATGGCAAATGGA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GAAGAGAAAAAC +AATGATTTTTATGCAGTTTTGGGGTTGAATAAAGAATGCACTCCGACAGAGCTCAGGACT +GCTTATAAGAAACTTGCCCTGAGATGGCACCCTGATCGTTGCTCAGCTTCGGGA------ +---AATTCAAAGTTCGTGGAAGAAGCCAAGAAGAAATTTCAGGCCATTCAACAAGCCTAT +TCT------------------GTTCTGTCTGACTCAAACAAGAGGTTTCTGTACGACGTA +GGAGCTTATGACAGT------------------------GATGATGACGAAAATGGAATG +GGAGATTTTTTGAACGAAATGGCAGGGATGATGAGCCAGACAAAATCTAATGAAAATGGA +------GGGGAAAGCTTCGAGGAACTACAG------------------GAATTGTTTGAA +GAAATGTTCCAAGCGGACATT------GATTCA--------------------------- +---TTTGAGTCTACTGGTCAG--------------------------------------- +------------TCCACTCCTTCCTGCTCTGCTTCATCTTCG------------------ +------------------------------------TTTGGGTCATATGGTGAAAGTTCC +AGC---------------------------TCCAACAAGCGGAATTCCTCT---GAAATG +AGTTCTGTGGAG---ACTAGGCTGGAGAGTTCTTCTAGCTTCGATGCACAATTT------ +------------CACAGTTTTTGTCTCGGG---GTGGAA------CACAGGCAAGATATC +AAGCAACACAGAGGAGCCAGAGGAGGAATGCGAGGAGCAGCCGGCGGTAGTAGACGGAGA +AATGGCAGGAAACAAAAGGTTTCATCT---GGCCATGATGTTACTTCCAACGAC---TGT +GGCATTTCTGCTTCA--------------------------------------------- +>gnl_Vitvi12X_PACid_17827068 +---------ATGGCCGCCGGA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GAAGAGAAGAGC +AATGATTTTTATGCCGTTCTAGGGTTGAAAAAGGAATGCACCGCCTCCGAGCTCAGAAAT +GCGTACAAGAGACTTGCCCTGATGTGGCACCCAGATCGTTGCTCCTCGTCGGGA------ +---AACTCGAAATTCGTGGAAGAAGCGAAGAAGAAATTTCAGGCCATACAAGAAGCCTAT +TCA------------------GTTCTCTCTGATGCGAATAAAAGGTTTCTGTACGACGTT +GGAGCCTACGACAGC------------------------GATGATGACGAAAACGGAATG +GGGGATTTTTTGAATGAGATGGCGGTTATGATGAGCCAAACCAAGTCCAATGAAAATGGG +------AAGGAGAGCTTTGAGGAGTTGCAG------------------GAGCTCTTTGAG +GATATGTTCCAAAGGGATGTC------GACGCA--------------------------- +---TTCAACTCTGCCTCTCATCAC------------------------------------ +------------CCCATGAACTCTTTCCCCAGTTCTACTTCC------------------ +------------------------------------ACTTCTTCCTACTGCGAAAGCTCC +AAT------------------------GCCAACAACAAGCGGAATTCGGCT---GAAATG +GGCTCTGGAAGGATGATGAGTGCAGGGGAGTCCTCTGCTTTTGATGCCCACTTT------ +------------CAGAGCTTCTGCTTTGGG---ACA------------------------ +---------GGCGGCACGCCAGGGAGATTTCAGGAGGGGGAA---AGGAGCAAGAGGAGG +AATTCCAGGAGGAGCCAACGG--------------------------------------- +------------------------------------------------------------ +>gnl_Selmo1.0_PACid_15401289 +---------ATG------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------GAGAAGAGGAAA +GAGGATCCCTACACTGTTCTTGGTGTCCAAAAGTCGAGTTCTAGCTCGGAAATTCGCTCC +GCTTATCGGAAGCTCGCCATGAAATGGCATCCAGATAAG--------------------- +---CAACACTCTTTAGAGGATCAAGCAAAAGCGAAGTTCCAGGGCATTCAAGAAGCTTAT +TCA------------------GTGCTATCCGACGACAAAAAAAGAGTTCTTTATGATTCG +GGACTTTATGACGAGGGA------------------GATGACGAG--------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------------GTGAGT--------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +>gnl_Orysa6.0_PACid_16864430 +---------ATGGCCCGCGGCGGCGGC--------------------------------- +------------------------------------------------------------ +------------GGC------------------------------GGCGGCGGCGCGGAC +GCCGACCTGTACGCCGTCCTCGGCCTCAGCAGGGAGTGCACCGACGCCGACCTCAGGCTC +GCCTACCGCAAGCTCGCCATGATATGGCATCCGGACAGGTGCTCGGTGGCCGGCGGCAGC +GCGAGCGCGGCGGGCGTCGACGAGGCCAAGGAGCGATTCCAGGAGATCCAGGGCGCCTAC +TCC------------------GTGCTCTCCGACTCCAACAAGCGCTTCCTCTACGACGTC +GGCGTCTACGACGGCAACGACGGCGACGACGACGACGACGAAGCAGATCTGTCGGGGATG +GGCGATTTCCTCGGCGAGATGGCGCAGATGATGAGCCAGGCGACGCCT------------ +------GCGGAGAGCTTCGAGGAGTTGCAG------------------CAGCTGTTCGTG +GACATGTTCCAGGACGACATC------GACGCCGGC------------------------ +---CTCTGCCAGTCGACGCCG--------------------------------------- +------------CCGCCGCCGTCATGGCCGTCGCCTCCGGCG------------------ +---------------------------------------GCCGCCAATGCACGATCGCCG +GCGGCGGCGGCGACT---TCACGCAAGGGCGTGAACAAGCGGTGCTCACCG---GCGGCG +ATGGACATGGAC---------------TCCGGTTTGAGCAGCCTGCTGGGCATT------ +------------TCGGGCTTCTGTTTCGAG---GCGCCA------TGGACGTCGCAGGAC +GCGAGCACTGCCGCCGGCGGTGGC---------------------GGCGGCAAGAGGAGA +AAGCAGAGGCCGCCGCCGGCG---------AGCCACAACGTG------------------ +------------------------------------------------------------ +>gnl_Sorbi1.4_PACid_1982925 +---------ATGGCTGCTACAAGTCAC--------------------------------- +------------------------------------------------------------ +------------TGCGGCAACATCCAGGACCAGGACGAAGAAGCTTCGGCTCCTGGCGCC +GCCGACCTCTACGCCGTGCTCGGGCTCAACAGGGAGTGCACCGACGCCGAGCTCAGGGTC +GCGTACCGGCGGCTCGCCATGATATGGCATCCGGACAGGTGCTCGGCGTCCGGC------ +AGCTCGCCGGCGCGCATGGAGGAGGCCAAGGAGCGGTTCCAGGAGATCCAGGGCGCCTAC +TCC------------------GTGCTCTCCGACTCCAACAAGCGGCTCCTCTACGACGTC +GGCGTCTACGACAGCGACGAC---------------GACGAGGCTGACCTGTCGGGGATG +GGCGACTTCCTCGGAGAGATGGCCGACATGATGAGCCAGGCCACGCCA------------ +------ACGGAGACCTTCGAGGAGCTGCAG------------------CAGGTGTTCGTG +GACATGTTCCAGGACGACCTGGAC---GACGCCGGC------------------------ +---TTCTTCGGCGGGCTTCCGACG---ACGGGCCGCAGGGCCCAGGCA------CCCAGC +ACCTCGCTGCCGCCGTCGGTGTCGTCGTCGCCGTTGCGGCCG------------------ +------------------------------ACGCCTGCCGCTGGAAGAAGCAAGGGTCCG +CAAGCGACGCCGTCGTCGTCGTTTAAAGGCGTCGAGAGGCGGGGTTCGACG---TCGACG +GCGAAACGGCCGAGGCCCAACGGGTCGGCGGGCCTGGAATCGGACCTGGGCCTC------ +------------TCCGGATTCTGCTTCATG---GTGAGT------AAGGAGATGAGCAAG +TCGAAGGAGAGGCAAGCGGTA---------TGGGCCAGTGAC---GACGGTGACAGGAGC +ACCGATGGCAAGCAGAGGTTGTCGACG---AGCCGCGATGTCTCCGGTGGTGGG---ATG +TCACGCTCACTGCAGGGCCAAAGCAGCAAAAACTTGTTGCAGTGTATGGCCTCTAAGTCT +>gnl_Medtr3.5_Medtr8g022310.3 +---------ATGGCTAACGAA--------------------------------------- +------------------------------------------------------------ +------------------------------------------------GGAAACAAAAGC +AATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAGGAAT +GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGG------ +---AATGTGAAGTTTGTGGAAGAAGCTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTAT +TCT------------------GTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTT +GGAGTTTACGACAGT------------------------GATGATGACGAAAATGTAAGG +CACTTGTTT--------------------------------------------------- +---------CACACCATTCATGAGTTGGGG------------------ACCCTCTTTTGC +GTTATGTTTTGTTTCTTCATTTCCTTGAGG------------------------------ +------GGAGAG------------------------------------------------ +---------------------------AAGAGAAGCAACCTT------------------ +------------------------------------AATTTAACCTTTTCACTTTCACAT +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +>gnl_Nelnu1.0_NNU_000115-RA +---------ATGGAGGTGGAC--------------------------------------- +------------------------------------------------------------ +---------------------------------------------TCCCATCGATCATCT +CCATCTTACTACACCATCCTTGGTGTAGATCAGAATTCCTCCGCTTCCGAGATACGCAAT +GCTTACAGGAAGCTCGCGATGCAATGGCATCCAGACAAATGGACGAAAACTCCG------ +---------TCGCTCTTAGAGAAAGCCAAGAGTAAATTCCAGCAAATCCAGGAGGCTTAT +TCGGGTGGGTTACTCGTTTTCATGTTATCGGATCAGGGGAAGAGAACACTGTATGATGTC +GGTCTGTATGACCCGGACGAT---------------GAAACGAATGACGAGGTGGGGCTT +CGCAGATTT------CATGCAGGAGATGATATCTCTCATGAACGATGT------------ +------GAAGAAACAGGAGAAGAAATACAGCTTGGAGGAACTACAGGAGATGTTAGTGGA +AATGTCACAAGGGCTGGAGTT------GAAGATGGAGAGTGCTGGTGGTGTGTGGTTGTT +AGATGGAGCTGCAGCCTCAAGGAG------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------GAGCTC +AAAGAGGGCCAG------------------------------------------------ +------------ATGGGAATCATCAGCGAG---TCCGAC------GACGCT--------- +------------------------------------------------------------ +------------------------------GGACACGACACACCTTCTCTCCCCCACTTG +CACGGTTCAGAGCTG---------------GAATTGTTAGGAAGAACCGGCTGTTGCAAT +>contig_7 +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------GAGAATGAGTGGTCTGGG +GCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGA +------ACCGGAACTTTTGAAGAACTGCAA------------------CAATTGTTCGAT +GAAATGTTTCAGAGCGACATC------GAGTCC--------------------------- +---TTCAATGGTTGTTCTTCATCA------------------------------------ +------------TCCAATGAAACATGTAGC------------------------------ +------------------------------------------------------------ +---------------------------AACTCGAACAAGAGGAATTCCATT---GAGTCG +AGCTCGGCTAAT------------------------------------------------ +TTCAGACCCGAAAATGGAAACGAAAGCGGC------------------------------ +---------------------------------------GAG---ATTAGCGGGAAGAAG +AATACTAGGAAAGGTAAAGGTGACGNN--------------------------------- +------------------------------------------------------------
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/38889.faa Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,14 @@ +>gnl_Mimgu1.0_PACid_17675619 +MAETKTKVLKFDAEEDVAVALAKYTAYLSEKYIKEKNSFSVVLSGGTLIDTLRKLVEFPYKDSVDWSKWLIFWVDERVVP +LDHEDSNYLLAYRGFLSKVPIPPSNIYAINDKKSPEGAADDYEERIKNLVEEKTLPISDSGFPKFDLMLLGMGPDGHVAS +LFPSHNQRYEKKRWVTFITDSPKPPPPRITFTFPVINSASDIAMVVTGAELADTTKKALGNEKHTLPPLPCTEVSAEREL +TWFLDKDAASKL +>gnl_Solly2.3_Solyc06g053200.2.1 +MATQKGKKTVLKFDSEEDVSKALAKYTAELSEKFIKQKGSFTVVLSGGSLIDTMRKLVEPPYKDSIDWSKWWIFWVDERV +VPLGHDDSNYKLASDGFLSKVPIPSSNIYAINDKESPEGAAADYEARLKQLIESKVLPLSAITGFPKFDLMLLGMGPDGH +VASLFPLHPHRHEKERLVTFITDSPKPPPPRITFTFPVINSASEIAMVVTGAELAHMVDVALGNAPPPDGIPPPCTEVSA +EEELTWFLDKDAASELQTSR +>contig_2 +XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK +WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS +QLLNYVRFDD
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/38889.faa.aln Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,18 @@ +>gnl_Mimgu1.0_PACid_17675619 +MAETKTK--VLKFDAEEDVAVALAKYTAYLSEKYIKEKNSFSVVLSGGTLIDTLRKLVEF +PYKDSVDWSKWLIFWVDERVVPLDHEDSNYLLAYRGFLSKVPIPPSNIYAINDKKSPEGA +ADDYEERIKNLVEEKTLPISD-SGFPKFDLMLLGMGPDGHVASLFPSHNQRYEKKRWVTF +ITDSPKPPPPRITFTFPVINSASDIAMVVTGAELADTTKKALGNEKHTLP---PLPCTEV +SAERELTWFLDKDAASKL-------- +>gnl_Solly2.3_Solyc06g053200.2.1 +MATQKGKKTVLKFDSEEDVSKALAKYTAELSEKFIKQKGSFTVVLSGGSLIDTMRKLVEP +PYKDSIDWSKWWIFWVDERVVPLGHDDSNYKLASDGFLSKVPIPSSNIYAINDKESPEGA +AADYEARLKQLIESKVLPLSAITGFPKFDLMLLGMGPDGHVASLFPLHPHRHEKERLVTF +ITDSPKPPPPRITFTFPVINSASEIAMVVTGAELAHMVDVALGNAP--PPDGIPPPCTEV +SAEEELTWFLDKDAASEL----QTSR +>contig_2 +------------------------------------------------------------ +------------------------------------XLSKVPIPSNNIYAINDKKSPEDA +ADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTF +ITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEV +SAEGEVTWFLDKDAASQLLNYVRFDD
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/38889.fna Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,30 @@ +>gnl_Mimgu1.0_PACid_17675619 +ATGGCCGAAACCAAAACAAAAGTACTGAAATTCGACGCGGAGGAAGATGTGGCCGTCGCTCTAGCAAAGTACACCGCCTA +TCTCTCCGAAAAGTACATCAAGGAAAAGAATTCTTTCTCGGTGGTTCTCTCCGGCGGCACCCTAATAGATACACTCAGGA +AACTAGTAGAGTTTCCATACAAGGATTCTGTGGATTGGTCGAAATGGCTGATATTTTGGGTTGACGAGAGAGTGGTTCCT +CTTGATCATGAAGACAGCAACTACTTACTTGCATACCGTGGTTTTCTTTCAAAGGTACCTATTCCTCCAAGCAACATTTA +CGCAATCAACGACAAGAAGTCTCCGGAAGGTGCAGCCGATGATTACGAGGAGCGTATCAAGAATCTGGTCGAGGAAAAAA +CCCTACCTATTTCAGACAGTGGCTTCCCTAAATTCGACCTTATGCTTCTCGGAATGGGGCCCGATGGCCACGTGGCGTCT +CTTTTCCCCTCTCACAATCAACGGTACGAAAAGAAACGGTGGGTGACATTCATAACTGACTCTCCCAAACCGCCGCCACC +TAGGATCACTTTCACATTCCCAGTCATCAACTCTGCTTCGGACATTGCAATGGTGGTCACTGGTGCTGAGCTGGCGGATA +CTACGAAGAAAGCATTGGGAAACGAGAAGCATACTCTTCCTCCTCTTCCTTGTACTGAAGTTTCGGCTGAGAGAGAGCTC +ACTTGGTTCTTGGACAAAGATGCTGCTTCTAAACTG +>gnl_Solly2.3_Solyc06g053200.2.1 +ATGGCAACCCAGAAAGGGAAGAAGACGGTGCTAAAATTCGACTCCGAAGAAGATGTATCAAAGGCACTTGCTAAATACAC +TGCTGAGCTATCGGAAAAATTCATCAAACAAAAAGGTTCTTTCACTGTTGTGCTCTCTGGTGGTTCTCTTATCGATACCA +TGAGGAAATTGGTAGAGCCGCCGTACAAAGACTCAATTGATTGGTCGAAATGGTGGATTTTTTGGGTAGACGAAAGAGTG +GTTCCTCTAGGTCACGATGATAGCAATTATAAACTTGCTTCGGATGGGTTTCTTTCTAAGGTTCCGATCCCCTCTTCTAA +CATTTATGCGATTAATGACAAGGAGTCACCTGAGGGTGCAGCTGCTGATTACGAAGCTCGTCTGAAACAATTGATTGAGA +GCAAAGTTCTTCCGTTATCAGCAATTACTGGATTCCCCAAATTTGATCTTATGCTATTAGGTATGGGGCCAGATGGACAT +GTAGCGTCTTTGTTTCCTTTGCATCCTCACCGCCACGAGAAGGAGCGGCTGGTCACCTTCATTACAGACTCACCAAAACC +TCCTCCACCAAGGATTACTTTCACCTTTCCGGTAATTAATTCGGCTTCAGAGATAGCAATGGTGGTCACAGGAGCAGAGT +TAGCTCATATGGTTGATGTCGCTTTGGGTAATGCGCCTCCTCCTGATGGAATTCCTCCCCCTTGTACTGAGGTTTCAGCT +GAAGAGGAACTGACCTGGTTTTTAGACAAGGATGCTGCATCAGAACTACAGACCTCTAGA +>contig_2 +NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA +CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC +TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA +TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC +GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC +CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA +CAACTGTTAAATTATGTGCGCTTTGATGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/38889.fna.aln Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,45 @@ +>gnl_Mimgu1.0_PACid_17675619 +ATGGCCGAAACCAAAACAAAA------GTACTGAAATTCGACGCGGAGGAAGATGTGGCC +GTCGCTCTAGCAAAGTACACCGCCTATCTCTCCGAAAAGTACATCAAGGAAAAGAATTCT +TTCTCGGTGGTTCTCTCCGGCGGCACCCTAATAGATACACTCAGGAAACTAGTAGAGTTT +CCATACAAGGATTCTGTGGATTGGTCGAAATGGCTGATATTTTGGGTTGACGAGAGAGTG +GTTCCTCTTGATCATGAAGACAGCAACTACTTACTTGCATACCGTGGTTTTCTTTCAAAG +GTACCTATTCCTCCAAGCAACATTTACGCAATCAACGACAAGAAGTCTCCGGAAGGTGCA +GCCGATGATTACGAGGAGCGTATCAAGAATCTGGTCGAGGAAAAAACCCTACCTATTTCA +GAC---AGTGGCTTCCCTAAATTCGACCTTATGCTTCTCGGAATGGGGCCCGATGGCCAC +GTGGCGTCTCTTTTCCCCTCTCACAATCAACGGTACGAAAAGAAACGGTGGGTGACATTC +ATAACTGACTCTCCCAAACCGCCGCCACCTAGGATCACTTTCACATTCCCAGTCATCAAC +TCTGCTTCGGACATTGCAATGGTGGTCACTGGTGCTGAGCTGGCGGATACTACGAAGAAA +GCATTGGGAAACGAGAAGCATACTCTTCCT---------CCTCTTCCTTGTACTGAAGTT +TCGGCTGAGAGAGAGCTCACTTGGTTCTTGGACAAAGATGCTGCTTCTAAACTG------ +------------------ +>gnl_Solly2.3_Solyc06g053200.2.1 +ATGGCAACCCAGAAAGGGAAGAAGACGGTGCTAAAATTCGACTCCGAAGAAGATGTATCA +AAGGCACTTGCTAAATACACTGCTGAGCTATCGGAAAAATTCATCAAACAAAAAGGTTCT +TTCACTGTTGTGCTCTCTGGTGGTTCTCTTATCGATACCATGAGGAAATTGGTAGAGCCG +CCGTACAAAGACTCAATTGATTGGTCGAAATGGTGGATTTTTTGGGTAGACGAAAGAGTG +GTTCCTCTAGGTCACGATGATAGCAATTATAAACTTGCTTCGGATGGGTTTCTTTCTAAG +GTTCCGATCCCCTCTTCTAACATTTATGCGATTAATGACAAGGAGTCACCTGAGGGTGCA +GCTGCTGATTACGAAGCTCGTCTGAAACAATTGATTGAGAGCAAAGTTCTTCCGTTATCA +GCAATTACTGGATTCCCCAAATTTGATCTTATGCTATTAGGTATGGGGCCAGATGGACAT +GTAGCGTCTTTGTTTCCTTTGCATCCTCACCGCCACGAGAAGGAGCGGCTGGTCACCTTC +ATTACAGACTCACCAAAACCTCCTCCACCAAGGATTACTTTCACCTTTCCGGTAATTAAT +TCGGCTTCAGAGATAGCAATGGTGGTCACAGGAGCAGAGTTAGCTCATATGGTTGATGTC +GCTTTGGGTAATGCGCCT------CCTCCTGATGGAATTCCTCCCCCTTGTACTGAGGTT +TCAGCTGAAGAGGAACTGACCTGGTTTTTAGACAAGGATGCTGCATCAGAACTA------ +------CAGACCTCTAGA +>contig_2 +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------NNCCTTTCCAAG +GTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCA +GCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCA +ACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCAT +GTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTC +ATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAAC +TCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATA +GCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTT +TCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAAT +TATGTGCGCTTTGATGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/39614.faa Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,19 @@ +>gnl_Orysa6.0_PACid_16878968 +MSHMEAFQNVVLLHKANSNSTLEDISSLSAASCCSLDQLLACVEGEAQKIFGDIQNLLADHRSEVAHFTQELRESFRISL +DRTKDMSSFILGLFDKYVEETSKLQSHSNHTHEAQVKSLEDFQKAYEEQSKSEEQKLLADITSLVSKHVTRQRELVGGRL +NSLGDAARGNKAFLDEHTSAMEVVTKDAKRKWEMFAEQAENDCKVGSNFSAAKHCRMETILQECACTVDTAAQQWKASHA +TVNDLCRKQIAEVEALVRSAIETNEQHEAEIASSRATAEEHASNSSKDLLQDVDNMLQEARNSSSRVVSTVEAHLGESQH +LQESHSSHTAGINTHADNAFQSSYKDYEPTGETPVRSEPEVPSKDAIESLRAMPMESLMDEFRENHPYEPSKDRRPSLIP +RSPLATINN +>gnl_Phoda3.0_PDK_30s1023721g001 +VNQKMMKCTLIKDLYGEIERLKAEVYAAREKVGVYIPKERYHQEESERKAMAEQIEQMGVLLENNQKQIEDLQERYNTQL +QQSDDLSKKLDATEILCVSLSKKLDATEKSLEHTSKLLAAAREDLKQAQYTLKEKDFVISEQRKAAREDKLNTANRSIVN +NFRADLATRVGTLCNTVVASLDRQNEHLQSVEKLCQSSLDFHDKAVSELKRKVSASRALYTSHMEALQNVVRLHKASSNA +SLEEMSSMISANTCSLDQLLALGQSEADLIFSDLQSILSIHRGEIANFTRELREKFQVNLDRTKEMSNFILELLEKIGKG +TKEFQNDSTLVHEAQVKSIGDFQKAYEVEVRLTGLGDAARDSKAIMDNHASSMDIVTTDAKRKWEEYSKQAEQDSEDGSN +FSAAKHCRMELMLQQCVNSVDATSQQWKKTHASVSEMSSKHVAEIEALVRSAIESNDQHDAEVASARMAAEEDVAKNSKD +VLQHFDTVIDHERNSAAGVMAAVEAHSATLHKLQEEQSSQATEINSHAEDTFQNTYMDYEPTGETPTRSEPDIPSRGTIE +SLRAMPIEALLEEFRENHPYESKEPKPSLIPRSPLVQLN +>contig_3 +XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG +TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/39614.faa.aln Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,36 @@ +>gnl_Orysa6.0_PACid_16878968 +M----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +-----------------------------------------SHMEAFQNVVLLHKANSNS +TLEDISSLSAASCCSLDQLLACVEGEAQKIFGDIQNLLADHRSEVAHFTQELRESFRISL +DRTKDMSSFILGLFDKYVEETSKLQSHSNHTHEAQVKSLEDFQKAYEEQSKSEEQKLLAD +ITSLVSKHVTRQRELVGGRLNSLGDAARGNKAFLDEHTSAMEVVTKDAKRKWEMFAEQAE +NDCKVGSNFSAAKHCRMETILQECACTVDTAAQQWKASHATVNDLCRKQIAEVEALVRSA +IETNEQHEAEIASSRATAEEHASNSSKDLLQDVDNMLQEARNSSSRVVSTVEAHLGESQH +LQESHSSHTAGINTHADNAFQSSYKDYEPTGETPVRSEPEVPSKDAIESLRAMPMESLMD +EFRENHPYEPS---KDRRPSLIPRSPLATINN +>gnl_Phoda3.0_PDK_30s1023721g001 +VNQKMMKCTLIKDLYGEIERLKAEVYAAREKVGVYIPKERYHQEESERKAMAEQIEQMGV +LLENNQKQIEDLQERYNTQLQQSDDLSKKLDATEILCVSLSKKLDATEKSLEHTSKLLAA +AREDLKQAQYTLKEKDFVISEQRKAAREDKLNTANRSIVNNFRADLATRVGTLCNTVVAS +LDRQNEHLQSVEKLCQSSLDFHDKAVSELKRKVSASRALYTSHMEALQNVVRLHKASSNA +SLEEMSSMISANTCSLDQLLALGQSEADLIFSDLQSILSIHRGEIANFTRELREKFQVNL +DRTKEMSNFILELLEKIGKGTKEFQNDSTLVHEAQVKSIGDFQKAYE------------- +---------------VEVRLTGLGDAARDSKAIMDNHASSMDIVTTDAKRKWEEYSKQAE +QDSEDGSNFSAAKHCRMELMLQQCVNSVDATSQQWKKTHASVSEMSSKHVAEIEALVRSA +IESNDQHDAEVASARMAAEEDVAKNSKDVLQHFDTVIDHERNSAAGVMAAVEAHSATLHK +LQEEQSSQATEINSHAEDTFQNTYMDYEPTGETPTRSEPDIPSRGTIESLRAMPIEALLE +EFRENHPYES----KEPKPSLIPRSPLVQLN- +>contig_3 +X----------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------------VD------------------------EGVV--- +--------------------------------VAGLSEQEKASVSEILTTARAHSETIEN +LKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEE +EFRENHSYESAVTGKELMPSVTTRAPFSQIN-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/39614.fna Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,47 @@ +>gnl_Orysa6.0_PACid_16878968 +ATGTCACATATGGAAGCCTTCCAAAATGTTGTGCTCCTGCATAAAGCAAATTCAAATTCTACACTAGAGGATATATCATC +CCTATCTGCTGCAAGCTGTTGCAGCCTTGATCAGCTTCTAGCTTGTGTCGAGGGAGAGGCACAGAAGATATTTGGTGATA +TCCAGAATTTGCTAGCTGATCATCGAAGCGAAGTGGCACATTTCACTCAAGAGTTGCGGGAGAGTTTCCGCATTAGCTTG +GATAGGACGAAGGACATGTCTAGTTTCATCCTTGGGTTGTTCGATAAGTATGTGGAGGAAACTTCGAAGTTGCAGAGCCA +CTCCAATCACACACATGAAGCACAAGTCAAAAGCCTTGAAGATTTCCAGAAGGCTTATGAGGAGCAATCAAAATCAGAAG +AACAAAAGCTTCTGGCGGACATCACCAGTTTGGTTTCTAAACACGTTACTCGACAACGAGAACTGGTGGGTGGTAGACTA +AACTCTCTTGGTGACGCCGCTCGTGGAAACAAAGCATTTTTGGATGAGCACACGTCCGCCATGGAGGTGGTCACGAAGGA +CGCCAAGAGAAAGTGGGAAATGTTTGCAGAGCAGGCAGAGAATGACTGCAAAGTTGGGTCCAACTTCTCTGCAGCTAAGC +ATTGTCGCATGGAAACCATTCTGCAGGAATGTGCATGCACCGTCGACACTGCTGCTCAACAATGGAAAGCATCACATGCA +ACTGTTAACGATCTATGCAGAAAACAAATAGCTGAAGTTGAAGCACTCGTCAGGAGTGCAATCGAAACCAACGAGCAGCA +CGAAGCAGAGATTGCATCTTCCCGTGCCACGGCCGAGGAGCATGCGTCCAACAGCAGCAAGGACCTACTCCAAGATGTTG +ACAATATGCTGCAGGAGGCGCGCAATTCGTCGTCGAGAGTGGTGTCGACGGTGGAAGCTCATTTGGGAGAGAGCCAGCAT +CTACAGGAGAGCCACTCCAGCCATACCGCCGGCATCAACACCCACGCCGACAACGCTTTCCAGAGCAGCTACAAGGACTA +CGAGCCGACCGGCGAAACTCCGGTGAGGTCGGAGCCGGAGGTGCCGAGCAAAGACGCGATCGAGTCGCTGCGAGCGATGC +CGATGGAGTCCCTGATGGACGAGTTCCGCGAGAACCACCCCTACGAGCCGAGCAAGGACCGCAGGCCATCGCTCATCCCT +CGCTCGCCGCTCGCCACCATCAACAAC +>gnl_Phoda3.0_PDK_30s1023721g001 +GTAAACCAAAAAATGATGAAATGTACATTAATCAAAGATCTCTATGGAGAAATTGAGCGTCTAAAAGCAGAGGTGTATGC +TGCTCGTGAGAAAGTTGGAGTTTACATACCAAAAGAACGCTACCATCAAGAAGAGAGCGAACGGAAGGCAATGGCAGAAC +AAATTGAACAAATGGGGGTCTTGCTCGAAAACAATCAAAAGCAAATTGAGGATCTACAAGAAAGGTATAATACTCAACTT +CAACAGTCTGATGACCTGAGCAAAAAGCTTGATGCCACCGAGATTCTCTGTGTTTCTCTGAGCAAAAAGCTTGATGCCAC +CGAGAAAAGTTTGGAGCACACTAGCAAGTTATTGGCTGCTGCCAGAGAAGATCTGAAGCAAGCTCAGTATACTCTGAAGG +AGAAAGATTTTGTTATATCAGAGCAGAGGAAAGCAGCTAGAGAAGACAAACTGAATACTGCCAACAGATCTATTGTGAAC +AATTTTCGGGCTGATCTTGCAACAAGGGTTGGAACACTTTGTAATACTGTTGTTGCATCCTTGGATCGGCAAAATGAACA +CCTTCAGTCTGTTGAGAAACTATGTCAATCTAGCCTTGATTTCCATGACAAGGCAGTATCAGAGCTGAAAAGGAAAGTGT +CAGCTTCAAGAGCTTTGTATACTTCCCATATGGAAGCACTACAAAATGTAGTGCGTTTGCATAAGGCAAGCAGCAATGCC +AGCTTAGAAGAGATGTCATCCATGATTTCTGCCAATACCTGCTCTCTTGATCAGTTACTTGCCTTGGGGCAAAGCGAAGC +AGATCTGATTTTTAGTGATCTGCAAAGCATATTGTCAATTCACCGAGGAGAGATTGCAAATTTCACCCGTGAACTTCGTG +AGAAATTTCAAGTTAATTTGGATCGGACAAAGGAGATGTCCAATTTTATTCTTGAGCTGCTTGAAAAGATAGGGAAGGGA +ACAAAAGAATTTCAGAATGACTCAACTTTGGTACATGAGGCTCAGGTGAAGAGCATTGGTGATTTCCAAAAGGCATATGA +GGTGGAAGTGAGGCTCACTGGACTGGGAGATGCTGCTCGAGATAGCAAAGCAATTATGGATAACCATGCATCATCAATGG +ACATCGTCACAACTGATGCTAAGAGGAAGTGGGAAGAATATTCCAAGCAGGCAGAGCAAGATTCAGAGGACGGTTCTAAC +TTTTCAGCAGCAAAACATTGTCGCATGGAACTCATGCTCCAACAGTGTGTAAACTCTGTTGATGCTACTTCTCAACAGTG +GAAGAAGACACATGCATCTGTTAGTGAGATGAGCAGCAAACACGTTGCTGAAATTGAAGCACTTGTAAGGAGTGCCATTG +AGAGCAATGATCAGCATGATGCTGAGGTTGCTTCAGCAAGAATGGCAGCAGAAGAGGATGTAGCGAAAAATAGCAAAGAT +GTTCTTCAGCATTTTGATACTGTGATTGATCATGAGCGCAACTCAGCCGCTGGAGTGATGGCAGCAGTCGAAGCTCACTC +AGCAACCCTGCATAAACTGCAAGAGGAACAATCAAGCCAGGCAACAGAGATTAATAGCCATGCGGAGGACACATTCCAAA +ACACCTACATGGACTATGAACCAACGGGAGAAACCCCAACAAGGTCGGAACCAGATATACCAAGCAGGGGAACAATCGAA +TCTCTTCGAGCCATGCCGATAGAAGCCCTCCTTGAAGAGTTCCGGGAGAACCATCCATACGAGTCCAAGGAGCCCAAACC +GTCTCTCATACCACGCTCTCCACTCGTCCAGCTCAAC +>contig_3 +NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG +AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA +CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC +ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT +TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/39614.fna.aln Mon Oct 30 09:52:00 2017 -0400 @@ -0,0 +1,99 @@ +>gnl_Orysa6.0_PACid_16878968 +ATG--------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---TCACATATGGAAGCCTTCCAAAATGTTGTGCTCCTGCATAAAGCAAATTCAAATTCT +ACACTAGAGGATATATCATCCCTATCTGCTGCAAGCTGTTGCAGCCTTGATCAGCTTCTA +GCTTGTGTCGAGGGAGAGGCACAGAAGATATTTGGTGATATCCAGAATTTGCTAGCTGAT +CATCGAAGCGAAGTGGCACATTTCACTCAAGAGTTGCGGGAGAGTTTCCGCATTAGCTTG +GATAGGACGAAGGACATGTCTAGTTTCATCCTTGGGTTGTTCGATAAGTATGTGGAGGAA +ACTTCGAAGTTGCAGAGCCACTCCAATCACACACATGAAGCACAAGTCAAAAGCCTTGAA +GATTTCCAGAAGGCTTATGAGGAGCAATCAAAATCAGAAGAACAAAAGCTTCTGGCGGAC +ATCACCAGTTTGGTTTCTAAACACGTTACTCGACAACGAGAACTGGTGGGTGGTAGACTA +AACTCTCTTGGTGACGCCGCTCGTGGAAACAAAGCATTTTTGGATGAGCACACGTCCGCC +ATGGAGGTGGTCACGAAGGACGCCAAGAGAAAGTGGGAAATGTTTGCAGAGCAGGCAGAG +AATGACTGCAAAGTTGGGTCCAACTTCTCTGCAGCTAAGCATTGTCGCATGGAAACCATT +CTGCAGGAATGTGCATGCACCGTCGACACTGCTGCTCAACAATGGAAAGCATCACATGCA +ACTGTTAACGATCTATGCAGAAAACAAATAGCTGAAGTTGAAGCACTCGTCAGGAGTGCA +ATCGAAACCAACGAGCAGCACGAAGCAGAGATTGCATCTTCCCGTGCCACGGCCGAGGAG +CATGCGTCCAACAGCAGCAAGGACCTACTCCAAGATGTTGACAATATGCTGCAGGAGGCG +CGCAATTCGTCGTCGAGAGTGGTGTCGACGGTGGAAGCTCATTTGGGAGAGAGCCAGCAT +CTACAGGAGAGCCACTCCAGCCATACCGCCGGCATCAACACCCACGCCGACAACGCTTTC +CAGAGCAGCTACAAGGACTACGAGCCGACCGGCGAAACTCCGGTGAGGTCGGAGCCGGAG +GTGCCGAGCAAAGACGCGATCGAGTCGCTGCGAGCGATGCCGATGGAGTCCCTGATGGAC +GAGTTCCGCGAGAACCACCCCTACGAGCCGAGC---------AAGGACCGCAGGCCATCG +CTCATCCCTCGCTCGCCGCTCGCCACCATCAACAAC +>gnl_Phoda3.0_PDK_30s1023721g001 +GTAAACCAAAAAATGATGAAATGTACATTAATCAAAGATCTCTATGGAGAAATTGAGCGT +CTAAAAGCAGAGGTGTATGCTGCTCGTGAGAAAGTTGGAGTTTACATACCAAAAGAACGC +TACCATCAAGAAGAGAGCGAACGGAAGGCAATGGCAGAACAAATTGAACAAATGGGGGTC +TTGCTCGAAAACAATCAAAAGCAAATTGAGGATCTACAAGAAAGGTATAATACTCAACTT +CAACAGTCTGATGACCTGAGCAAAAAGCTTGATGCCACCGAGATTCTCTGTGTTTCTCTG +AGCAAAAAGCTTGATGCCACCGAGAAAAGTTTGGAGCACACTAGCAAGTTATTGGCTGCT +GCCAGAGAAGATCTGAAGCAAGCTCAGTATACTCTGAAGGAGAAAGATTTTGTTATATCA +GAGCAGAGGAAAGCAGCTAGAGAAGACAAACTGAATACTGCCAACAGATCTATTGTGAAC +AATTTTCGGGCTGATCTTGCAACAAGGGTTGGAACACTTTGTAATACTGTTGTTGCATCC +TTGGATCGGCAAAATGAACACCTTCAGTCTGTTGAGAAACTATGTCAATCTAGCCTTGAT +TTCCATGACAAGGCAGTATCAGAGCTGAAAAGGAAAGTGTCAGCTTCAAGAGCTTTGTAT +ACTTCCCATATGGAAGCACTACAAAATGTAGTGCGTTTGCATAAGGCAAGCAGCAATGCC +AGCTTAGAAGAGATGTCATCCATGATTTCTGCCAATACCTGCTCTCTTGATCAGTTACTT +GCCTTGGGGCAAAGCGAAGCAGATCTGATTTTTAGTGATCTGCAAAGCATATTGTCAATT +CACCGAGGAGAGATTGCAAATTTCACCCGTGAACTTCGTGAGAAATTTCAAGTTAATTTG +GATCGGACAAAGGAGATGTCCAATTTTATTCTTGAGCTGCTTGAAAAGATAGGGAAGGGA +ACAAAAGAATTTCAGAATGACTCAACTTTGGTACATGAGGCTCAGGTGAAGAGCATTGGT +GATTTCCAAAAGGCATATGAG--------------------------------------- +---------------------------------------------GTGGAAGTGAGGCTC +ACTGGACTGGGAGATGCTGCTCGAGATAGCAAAGCAATTATGGATAACCATGCATCATCA +ATGGACATCGTCACAACTGATGCTAAGAGGAAGTGGGAAGAATATTCCAAGCAGGCAGAG +CAAGATTCAGAGGACGGTTCTAACTTTTCAGCAGCAAAACATTGTCGCATGGAACTCATG +CTCCAACAGTGTGTAAACTCTGTTGATGCTACTTCTCAACAGTGGAAGAAGACACATGCA +TCTGTTAGTGAGATGAGCAGCAAACACGTTGCTGAAATTGAAGCACTTGTAAGGAGTGCC +ATTGAGAGCAATGATCAGCATGATGCTGAGGTTGCTTCAGCAAGAATGGCAGCAGAAGAG +GATGTAGCGAAAAATAGCAAAGATGTTCTTCAGCATTTTGATACTGTGATTGATCATGAG +CGCAACTCAGCCGCTGGAGTGATGGCAGCAGTCGAAGCTCACTCAGCAACCCTGCATAAA +CTGCAAGAGGAACAATCAAGCCAGGCAACAGAGATTAATAGCCATGCGGAGGACACATTC +CAAAACACCTACATGGACTATGAACCAACGGGAGAAACCCCAACAAGGTCGGAACCAGAT +ATACCAAGCAGGGGAACAATCGAATCTCTTCGAGCCATGCCGATAGAAGCCCTCCTTGAA +GAGTTCCGGGAGAACCATCCATACGAGTCC------------AAGGAGCCCAAACCGTCT +CTCATACCACGCTCTCCACTCGTCCAGCTCAAC--- +>contig_3 +NNT--------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +---------------------GTGGAT--------------------------------- +---------------------------------------GAAGGAGTTGTT--------- +------------------------------------------------------------ +------------------------------------GTTGCTGGCTTGTCAGAGCAGGAG +AAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAAC +CTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTC +AGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGAT +ATTCCCAGCAAAGGCACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAA +GAATTTCGAGAAAACCATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCT +GTTACGACTCGTGCACCATTTTCACAGATCAAC---
--- a/utils.py Fri Aug 25 13:01:25 2017 -0400 +++ b/utils.py Mon Oct 30 09:52:00 2017 -0400 @@ -27,7 +27,7 @@ return fstderr, fherr, fstdout, fhout -def move_directory_files(source_dir, destination_dir, copy=False): +def move_directory_files(source_dir, destination_dir, copy=False, remove_source_dir=False): source_directory = os.path.abspath(source_dir) destination_directory = os.path.abspath(destination_dir) if not os.path.isdir(destination_directory): @@ -38,6 +38,8 @@ shutil.copy(source_entry, destination_directory) else: shutil.move(source_entry, destination_directory) + if remove_source_dir: + os.rmdir(source_directory) def run_command(cmd): @@ -52,29 +54,3 @@ def stop_err(msg): sys.exit(msg) - - -def write_html_output(output, title, dir): - with open(output, 'w') as fh: - dir_items = sorted(os.listdir(dir)) - # Directories can only contain either files or directories, - # but not both. - if len(dir_items) > 0: - item_path = os.path.join(dir, dir_items[0]) - if os.path.isdir(item_path): - header = 'Directories' - else: - header = 'Datasets' - else: - header = '' - fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items))) - fh.write('<body><p/><table cellpadding="2">\n') - fh.write('<tr><b>%s</th></b>\n' % header) - for index, fname in enumerate(dir_items): - if index % 2 == 0: - bgcolor = '#D8D8D8' - else: - bgcolor = '#FFFFFF' - link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname) - fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link)) - fh.write('</table></body></html>\n')