Repository 'last'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/last

Changeset 0:9a7e91fc6562 (2020-06-17)
Next changeset 1:86206f93fb13 (2020-07-22)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/last commit c5689f5fc818d1538b2e15251c7de203c70e2219"
added:
lastal.xml
lastdb.xml
lastsplit.xml
lasttrain.xml
macros_last.xml
maf-convert.xml
test-data/fuguMito.fa
test-data/hedgdb.bck
test-data/hedgdb.des
test-data/hedgdb.lastdb
test-data/hedgdb.prj
test-data/hedgdb.sds
test-data/hedgdb.ssp
test-data/hedgdb.suf
test-data/hedgdb.tis
test-data/hedgdb.txt
test-data/hedgehog_prot_drosophila.fa
test-data/hedgehog_prot_human.fa
test-data/humanMito.fa
test-data/humdb.bck
test-data/humdb.des
test-data/humdb.lastdb
test-data/humdb.prj
test-data/humdb.sds
test-data/humdb.ssp
test-data/humdb.suf
test-data/humdb.tis
test-data/humdb.txt
test-data/last_align_gen.maf
test-data/last_align_prot.maf
test-data/last_align_train_gen.maf
test-data/last_split.maf
test-data/last_train.txt
test-data/lastdb.loc
test-data/outfile_convert.axt
test-data/outfile_convert.blast
test-data/outfile_convert.html
test-data/outfile_convert.tab
test-data/tool_data_table_conf.xml.test
tool-data/lastdb.loc.sample
tool_data_table_conf.xml.sample
b
diff -r 000000000000 -r 9a7e91fc6562 lastal.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lastal.xml Wed Jun 17 14:50:21 2020 -0400
[
b'@@ -0,0 +1,361 @@\n+<tool id="last_al" name="LASTal" version="@LAST_CONDA_VERSION@+galaxy0" profile="18.01">\n+\n+    <description>finds local alignments between query sequences, and reference sequences.</description>\n+\n+    <macros>\n+        <import>macros_last.xml</import>\n+    </macros>\n+\n+    <requirements>\n+        <requirement type="package" version="@LAST_CONDA_VERSION@">last</requirement>\n+    </requirements>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+        #if $db_opts.db_opts_input == \'lastdb\'\n+            ln -s \'$db_opts.lastdatabase.extra_files_path\' \'./db_files\' &&\n+        #end if\n+        \n+\tlastal\n+\n+        -Q $lastal.Q\n+        -f $lastal.f\n+        -j $lastal.j\n+\n+        -R $lastal.repeats_misc.uppercase_misc$lastal.repeats_misc.simple_repeat_misc\n+\n+        #if $lastal.isprotein.schoring_schemes.score_matrix != \'scoreFile\'\n+            -p $lastal.isprotein.schoring_schemes.score_matrix\n+\n+            -X $lastal.isprotein.schoring_schemes.score_opt.X\n+            -x $lastal.isprotein.schoring_schemes.score_opt.x\n+            -y $lastal.isprotein.schoring_schemes.score_opt.y\n+            -z $lastal.isprotein.schoring_schemes.score_opt.z\n+            -d $lastal.isprotein.schoring_schemes.score_opt.d\n+            -e $lastal.isprotein.schoring_schemes.score_opt.e\n+\n+            -a $lastal.isprotein.schoring_schemes.cost_opt.a\n+            -b $lastal.isprotein.schoring_schemes.cost_opt.b\n+            -A $lastal.isprotein.schoring_schemes.cost_opt.A\n+            -B $lastal.isprotein.schoring_schemes.cost_opt.B\n+\n+            -s $lastal.isprotein.s\n+        #else if $lastal.isprotein.schoring_schemes.score_matrix == \'scoreFile\'\n+            -p $lastal.isprotein.schoring_schemes.scoreMatrixFile\n+        #end if\n+\n+        -D $lastal.evalue_opt.D\n+\n+        -m $lastal.init_match_opt.m\n+        -l $lastal.init_match_opt.l\n+        -L $lastal.init_match_opt.L\n+        -k $lastal.init_match_opt.k\n+        -W $lastal.init_match_opt.W\n+\n+        -S $lastal.misc_opt.S\n+        -K $lastal.misc_opt.K\n+        -C $lastal.misc_opt.C\n+        -T $lastal.misc_opt.T\n+        -n $lastal.misc_opt.n\n+        -w $lastal.misc_opt.w\n+        -u $lastal.misc_opt.u\n+        -g $lastal.misc_opt.g\n+\n+        -P \\${GALAXY_SLOTS:-1}\n+\n+        #if $db_opts.db_opts_input == \'db\'\n+            \'${"\'" "\'".join(str($db_opts.database.fields.path).split(","))}\'\n+        #else if $db_opts.db_opts_input == \'lastdb\'\n+            \'db_files/lastdb\'\n+        #end if\n+\n+        \'$query_fasta\'\n+\n+        >\'$outfile\'\n+    ]]></command>\n+\n+    <inputs>\n+        <expand macro="input_db"/>\n+        <param name="query_fasta" type="data" format="FASTA" label="Queries fasta file"/>\n+\n+        <section name="lastal" title="Lastal arguments" expanded="true">\n+            <param argument="-Q" type="select" multiple="false" label="Input format">\n+                <option value="0" selected="true">fasta or fastq-ignore</option>\n+                <option value="1">fastq-sanger</option>\n+                <option value="2">fastq-solexa</option>\n+                <option value="3">fastq-illumina</option>\n+                <option value="4">prb</option>\n+                <option value="5">PSSM</option>\n+            </param>            \n+            <param argument="-f" type="select" multiple="false" label="Output format">\n+                <option value="MAF" selected="true">MAF</option>\n+                <option value="TAB">TAB</option>\n+                <option value="BlastTab">BlastTab</option>\n+                <option value="BlastTab+">BlastTab+</option>\n+            </param>\n+            <param argument="-j" type="select" multiple="false" label="Output type">\n+                <option value="0">Match counts</option>\n+                <option value="1">Gapless</option>\n+                <option value="2">Redundant gapped</option>\n+                <option value="3" selected="true">Gapped</option>\n+                <option value="4">Column ambiguity estimates</option>\n+   '..b' off"/>\n+\n+                <!-- https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi -->\n+                <!-- Condition if ADN vs prot and -F -->\n+                <param argument="-G" type="select" multiple="false" label="Genetic code.">\n+                    <option value="1" selected="true">Standard Code</option>\n+                    <option value="2">Vertebrate Mitochondrial Code</option>\n+                    <option value="3">Yeast Mitochondrial Code</option>\n+                    <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma</option>\n+                    <option value="5">Invertebrate Mitochondrial Code</option>\n+                    <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option>\n+                    <option value="9">Echinoderm and Flatworm Mitochondrial Code</option>\n+                    <option value="10">Euplotid Nuclear Code</option>\n+                    <option value="11">Bacterial, Archaeal and Plant Plastid Code</option>\n+                    <option value="12">Alternative Yeast Nuclear Code</option>\n+                    <option value="13">Ascidian Mitochondrial Code</option>\n+                    <option value="14">Alternative Flatworm Mitochondrial Code</option>\n+                    <option value="16">Chlorophycean Mitochondrial Code</option>\n+                    <option value="21">Trematode Mitochondrial Code</option>\n+                    <option value="22">Scenedesmus obliquus Mitochondrial Code</option>\n+                    <option value="23">Thraustochytrium Mitochondrial Code</option>\n+                    <option value="24">Pterabranchia Mitochondrial Code</option>\n+                    <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>\n+                    <option value="26">Pachysolen tannophilus Nuclear Code</option>\n+                    <option value="27">Karyorelict Nuclear Code</option>\n+                    <option value="28">Condylostoma Nuclear Code</option>\n+                    <option value="29">Mesodinium Nuclear Code</option>\n+                    <option value="30">Peritrich Nuclear Code</option>\n+                    <option value="31">Blastocrithidia Nuclear Code</option>\n+                    <option value="33">Cephalodiscidae Mitochondrial UAA-Tyr Code</option>\n+                    <!-- Add filename option with <repeat> -->\n+                </param>\n+\n+                <!--<param name="temperature" argument="-t" type="integer" value="1/lambda" label="Temperature for calculating probabilities"/>-->\n+                <param argument="-g" type="integer" value="1" min="0" label="Gamma parameter for gamma-centroid and LAMA"/>\n+            </section>\n+        </section>\n+    </inputs>\n+\n+    <outputs>\n+        <data name="outfile" format="maf" label="LAST align from ${on_string}" />\n+    </outputs>\n+\n+    <tests>\n+        <test>\n+            <conditional name="db_opts">\n+                <param name="db_opts_input" value="db"/>\n+                <param name="database" value="humdb"/>\n+            </conditional>\n+            <param name="query_fasta" value="fuguMito.fa" ftype="fasta"/>\n+            <output name="outfile" ftype="maf" file="last_align_gen.maf" lines_diff="2"/>\n+        </test>\n+        <test>\n+            <conditional name="db_opts">\n+                <param name="db_opts_input" value="db"/>\n+                <param name="database" value="hedgdb"/>\n+            </conditional>\n+            <section name="lastal">\n+                <conditional name="isprotein">\n+                    <param name="lastal_protein" value="prot"/>\n+                </conditional>\n+            </section>\n+            <param name="query_fasta" value="hedgehog_prot_drosophila.fa" ftype="fasta"/>\n+            <output name="outfile" ftype="maf" file="last_align_prot.maf" lines_diff="2"/>\n+        </test>\n+    </tests>\n+\n+    <help>@LAST_HELP@</help>\n+    <citations><expand macro="citations"/></citations>\n+</tool>\n'
b
diff -r 000000000000 -r 9a7e91fc6562 lastdb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lastdb.xml Wed Jun 17 14:50:21 2020 -0400
[
b'@@ -0,0 +1,174 @@\n+<tool id="last_db" name="LASTdb" version="@LAST_CONDA_VERSION@+galaxy0" profile="18.01">\n+\n+    <description>prepares sequences for subsequent comparison and alignment using lastal.</description>\n+\n+    <macros>\n+        <import>macros_last.xml</import>\n+    </macros>\n+\n+    <requirements>\n+        <requirement type="package" version="@LAST_CONDA_VERSION@">last</requirement>\n+    </requirements>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+        lastdb \n+        -Q $lastdb.Q\n+        $lastdb.sequences_type.protein \n+        -a $lastdb.sequences_type.a\n+        -R $lastdb.repeats.uppercase$lastdb.repeats.simple_repeat \n+        $lastdb.repeats.sm_lower \n+        -S $lastdb.lastdb_advanced.S\n+        -u $lastdb.lastdb_advanced.seeds.u\n+        -w $lastdb.lastdb_advanced.seeds.w\n+        -W $lastdb.lastdb_advanced.seeds.W \n+        -m $lastdb.lastdb_advanced.seeds.m \n+        -i $lastdb.lastdb_advanced.i \n+        -C $lastdb.lastdb_advanced.C \n+        -s \\${GALAXY_MEMORY_MB:-2G}\n+        -P \\${GALAXY_SLOTS:-1}        \n+        \'lastdb\' \n+        \'$input_files\'\n+\n+        &&\n+\n+        mkdir \'$outfile.files_path\' &&\n+        mv lastdb.* \'$outfile.files_path\'\n+    ]]></command>\n+\n+    <inputs>\n+        <param name="input_files" type="data" format="FASTA" label="Reference(s) input files" />\n+        <section name="lastdb" title="Lastdb arguments" expanded="true">\n+            <param argument="-Q" type="select" multiple="false" label="Input format">\n+                <option value="0" selected="true">fasta or fastq-ignore</option>\n+                <option value="1">fastq-sanger</option>\n+                <option value="2">fastq-solexa</option>\n+                <option value="3">fastq-illumina</option>\n+            </param>\n+\n+            <conditional name="sequences_type">\n+                <param name="protein" type="select" multiple="false" label="The sequences are :">\n+                    <option value="" selected="true">DNA</option>\n+                    <option value="-p">Proteins (-p)</option>\n+                </param>\n+                <when value="">\n+                    <param argument="-a" type="text" value="ACGT" label="User-defined alphabet."/>\n+                </when>\n+                <when value="-p">\n+                    <param argument="-a" type="text" value="ACDEFGHIKLMNPQRSTVWY" label="User-defined alphabet."/>\n+                </when>\n+            </conditional>\n+\n+            <section name="repeats" title="Specify lowercase-marking of repeats. (-R)" expanded="false">\n+                <param name="uppercase" type="select" multiple="false" label="Convert the input sequences to uppercase while reading them.">\n+                    <option value="0">Convert the input sequences to uppercase while reading them. (0)</option>\n+                    <option value="1" selected="true">Keep any lowercase in the input sequences. (1)</option>\n+                </param>\n+                <param name="simple_repeat" type="select" multiple="false" label="Check for simple repeats.">\n+                    <option value="0" selected="true">Do not check for simple repeats.(0)</option>\n+                    <option value="1">Convert simple repeats to lowercase.(1)</option>\n+                    <option value="2">Convert simple DNA repeats to lowercase.(2)</option>\n+                </param>\n+                <param name="sm_lower" argument="-c" type="boolean" truevalue="-c" falsevalue="" checked="true" label="Soft-mask lowercase letters." help="This means that, when we compare these sequences to some other sequences using lastal, lowercase letters will be excluded from initial matches. This will apply to lowercase letters in both sets of sequences."/>\n+            </section>\n+\n+            <section name="lastdb_advanced" title="Advanced options" expanded="false">\n+                <param argument="-S" type="select" multiple="false" label="Strand">\n+                    <option value="0">Reverse</option>\n+                 '..b'     <when value="BISR">\n+                        <expand macro="step_macro" step="2"/>\n+                    </when>\n+                    <when value="MAM4">\n+                        <expand macro="step_macro" step="1"/>\n+                    </when>\n+                    <when value="MAM8">\n+                        <expand macro="step_macro" step="1"/>\n+                    </when>\n+                    <when value="MURPHY10">\n+                        <expand macro="step_macro" step="1"/>\n+                    </when>\n+                    <when value="NEAR">\n+                        <expand macro="step_macro" step="1"/>\n+                    </when>\n+                    <when value="YASS">\n+                        <expand macro="step_macro" step="1"/>\n+                    </when>\n+                </conditional>\n+\n+                <param argument="-i" type="integer" value="0" label="Minimum limit on initial matches per query position"/>\n+\n+                <param argument="-C" type="select" multiple="false" label="Child table type">\n+                    <option value="0" selected="true">None</option>\n+                    <option value="1">Byte-size</option>\n+                    <option value="2">Short-syze</option>\n+                    <option value="3">Full</option>\n+                </param>\n+            </section>\n+        </section>\n+    </inputs>\n+\n+    <outputs>\n+        <data name="outfile" format="lastdb">\n+            <discover_datasets pattern="__designation__" directory="ref_genome"/>\n+        </data>\n+    </outputs>\n+\n+    <tests>\n+        <test>\n+            <param name="input_files" value="humanMito.fa" ftype="fasta"/>\n+            <section name="lastdb">\n+                <section name="repeats">\n+                    <param name="uppercase" value="0"/>\n+                    <param name="simple_repeat" value="1"/>\n+                    <param name="sm_lower" value="true"/>\n+                </section>\n+            </section>\n+            <output name="outfile" compare="contains" file="humdb.lastdb" ftype="lastdb">\n+                <extra_files type="file" value="humdb.bck" name="lastdb.bck" compare="sim_size" delta="1"/>\n+                <extra_files type="file" value="humdb.des" name="lastdb.des"/>\n+                <extra_files type="file" value="humdb.prj" name="lastdb.prj"/>\n+                <extra_files type="file" value="humdb.sds" name="lastdb.sds"/>\n+                <extra_files type="file" value="humdb.ssp" name="lastdb.ssp" compare="sim_size" delta="1"/>\n+                <extra_files type="file" value="humdb.suf" name="lastdb.suf" compare="sim_size" delta="1"/>\n+                <extra_files type="file" value="humdb.tis" name="lastdb.tis" compare="sim_size" delta="1"/>\n+            </output>\n+        </test>\n+        <test>\n+            <param name="input_files" value="hedgehog_prot_human.fa" ftype="fasta"/>\n+            <section name="lastdb">\n+                <conditional name="sequences_type">\n+                    <param name="protein" value="-p"/> \n+                </conditional>\n+            </section>\n+            <output name="outfile" compare="contains" file="hedgdb.lastdb" ftype="lastdb">\n+                <extra_files type="file" value="hedgdb.bck" name="lastdb.bck" compare="sim_size" delta="1"/>\n+                <extra_files type="file" value="hedgdb.des" name="lastdb.des"/>\n+                <extra_files type="file" value="hedgdb.prj" name="lastdb.prj"/>\n+                <extra_files type="file" value="hedgdb.sds" name="lastdb.sds"/>\n+                <extra_files type="file" value="hedgdb.ssp" name="lastdb.ssp" compare="sim_size" delta="1"/>\n+                <extra_files type="file" value="hedgdb.suf" name="lastdb.suf" compare="sim_size" delta="1"/>\n+                <extra_files type="file" value="hedgdb.tis" name="lastdb.tis" compare="sim_size" delta="1"/>\n+            </output>\n+        </test>\n+    </tests>\n+\n+    <help>@LAST_HELP@</help>\n+    <citations><expand macro="citations"/></citations>\n+</tool>\n'
b
diff -r 000000000000 -r 9a7e91fc6562 lastsplit.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lastsplit.xml Wed Jun 17 14:50:21 2020 -0400
[
@@ -0,0 +1,81 @@
+<tool id="last_split" name="LAST-split" version="@LAST_CONDA_VERSION@+galaxy0" profile="18.01">
+
+    <description>finds "split alignments" (typically for DNA) or "spliced alignments" (typically for RNA).</description>
+
+    <macros>
+        <import>macros_last.xml</import>
+    </macros>
+
+    <requirements>
+        <requirement type="package" version="@LAST_CONDA_VERSION@">last</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #if $db_opts.db_opts_input == 'lastdb'
+            ln -s '$db_opts.lastdatabase.extra_files_path' './db_files' &&
+        #end if
+
+        last-split
+
+        -f $output_format
+        #if $db_opts.db_opts_input == 'db'
+            -g '${"'" "'".join(str($db_opts.database.fields.path).split(","))}'
+        #else if $db_opts.db_opts_input == 'lastdb'
+            -g 'db_files/lastdb'
+        #end if
+
+        -d $lastsplit_opt.d
+        -c $lastsplit_opt.c
+        -t $lastsplit_opt.t
+        -M $lastsplit_opt.M
+        -S $lastsplit_opt.S
+        -m $lastsplit_opt.m
+        $lastsplit_opt.no_split
+        -b \${GALAXY_MEMORY_MB:-2G}
+
+        '$last_align'
+
+        >'$outfile'
+    ]]></command>
+
+    <inputs>
+        <expand macro="input_db" />
+        <param name="last_align" type="data" format="maf" label="LASTal data file" />
+        <param name="output_format" argument="-f" type="select" multiple="false" label="Output format">
+            <option value="MAF">MAF</option>
+            <option value="MAF+" selected="true">MAF+</option>
+        </param>
+        <section name="lastsplit_opt" title="Last-split options" expanded="false">
+            <param argument="-d" type="select" multiple="false" label="RNA direction">
+                <option value="0">Reverse</option>
+                <option value="1" selected="true">Forward</option>
+                <option value="2">Mixed</option>
+            </param>    
+            <param argument="-c" type="float" value="0.004" label="Cis-splice probability per base"/>
+            <param argument="-t" type="float" value="0.00001" label="Trans-splice probability per base"/>
+            <param argument="-M" type="float" value="7" label="Mean of ln[intron length]"/>
+            <param argument="-S" type="float" value="1.7" label="Standard deviation of ln[intron length]"/>
+            <param argument="-m" type="float" value="1" label="Maximum mismap probability"/>
+            <!--<param name="score" argument="-s" type="int" value="lastal e" label="Minimum alignement score"/>-->
+            <param name="no_split" argument="-n" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Write original, not split, alignement."/>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data name="outfile" format="maf" label="LAST split from  ${on_string}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <conditional name="db_opts">
+                <param name="db_opts_input" value="db"/>
+                <param name="database" value="humdb"/>
+            </conditional>
+            <param name="last_align" ftype="maf" value="last_align_gen.maf"/>
+            <output name="outfile" ftype="maf" file="last_split.maf"/>
+        </test>
+    </tests>
+
+    <help>@LAST_HELP@</help>
+    <citations><expand macro="citations"/></citations>
+</tool>
b
diff -r 000000000000 -r 9a7e91fc6562 lasttrain.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lasttrain.xml Wed Jun 17 14:50:21 2020 -0400
[
@@ -0,0 +1,121 @@
+<tool id="last_train" name="LAST-train" version="@LAST_CONDA_VERSION@+galaxy0" profile="18.01">
+
+    <description>finds the rates (probabilities) of insertion, deletion, and substitutions between two sets of sequences.</description>
+
+    <macros>
+        <import>macros_last.xml</import>
+    </macros>
+
+    <requirements>
+        <requirement type="package" version="@LAST_CONDA_VERSION@">last</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #if $db_opts.db_opts_input == 'lastdb'
+            ln -s '$db_opts.lastdatabase.extra_files_path' './db_files' &&
+        #end if
+
+        last-train
+
+        -Q $init_options.Q
+
+        -r $init_options.r
+        -q $init_options.q
+
+        -a $init_options.cost_opt.a
+        -b $init_options.cost_opt.b
+        -A $init_options.cost_opt.A
+        -B $init_options.cost_opt.B
+
+        -D $last_train_opt.align_opt.D
+        -E $last_train_opt.align_opt.E
+        -s $last_train_opt.align_opt.s
+        -S $last_train_opt.align_opt.S
+        -T $last_train_opt.align_opt.T
+        -m $last_train_opt.align_opt.m
+        -k $last_train_opt.align_opt.k
+        -X $last_train_opt.align_opt.X
+
+        -P \${GALAXY_SLOTS:-1}
+
+        #if $db_opts.db_opts_input == 'db'
+            '${"' '".join(str($db_opts.database.fields.path).split(","))}'
+        #else if $db_opts.db_opts_input == 'lastdb'
+            'db_files/lastdb'
+        #end if
+
+        '$query_fasta'
+
+        >'$outfile'
+
+    ]]></command>
+
+    <inputs>
+        <expand macro="input_db" />
+        <param name="query_fasta" type="data" format="FASTA" label="Queries input files" />
+        <conditional name="init_options">            
+            <param argument="-Q" type="select" multiple="false" label="Input format">
+                <option value="0" selected="true">Fasta or fastq-ignore</option>
+                <option value="1">Fastq-sanger</option>
+            </param>
+            <when value="0">
+                <param argument="-r" type="integer" value="5" label="Match score"/>
+                <param argument="-q" type="integer" value="5" label="Mismatch cost"/>
+                <expand macro="cost_macro" a="15" b="3"/>
+            </when>
+            <when value="1">
+                <param argument="-r" type="integer" value="6" label="Match score"/>
+                <param argument="-q" type="integer" value="18" label="Mismatch cost"/>
+                <expand macro="cost_macro" a="21" b="9"/>
+            </when>
+        </conditional>
+
+        <section name="last_train_opt" title="Last-train options" expanded="false">
+            <!-- Training options missing  -->
+            <section name="align_opt" title="Alignment options" expanded="true">
+                <param argument="-D" type="integer" value="1000000" label="Query letters per random alignment"/>
+                <param argument="-E" type="integer" value="10" label="Maximum expected alignments per square giga"/>
+                <param argument="-s" type="select" multiple="false" label="Query strand to use">
+                    <option value="0">Reverse</option>
+                    <option value="1" selected="true">Forward</option>
+                    <option value="2">Both</option>
+                </param>
+                <param argument="-S" type="select" multiple="false" label="Score matrix applies to forward strand of:">
+                    <option value="0">Reference</option>
+                    <option value="1" selected="true">Query</option>
+                </param>                
+                <!--<param name="gapless_align" argument="-C" type="integer" value="" label="Omit gapless alignments in COUNT others with > score-per-length"/>-->
+                <param argument="-T" type="select" multiple="false" label="Type of alignment:">
+                    <option value="0" selected="true">Local</option>
+                    <option value="1">Overlap</option>
+                </param>
+                <param argument="-m" type="integer" value="10" label="Maximum initial matches per query position"/>
+                <param argument="-k" type="integer" value="1" label="Use initial matches starting at every STEP-th position in each query"/>
+                <param argument="-X" type="select" multiple="false" label="N/X is ambiguous in:">
+                    <option value="0" selected="true">Neither sequence</option>
+                    <option value="1">Reference</option>
+                    <option value="2">Query</option>
+                    <option value="3">Both</option>
+                </param>
+            </section>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data name="outfile" format="txt" label="LAST train from ${on_string}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <conditional name="db_opts">
+                <param name="db_opts_input" value="db"/>
+                <param name="database" value="humdb"/>
+            </conditional>
+            <param name="query_fasta" value="fuguMito.fa"/>
+            <output name="outfile" file="last_train.txt" ftype="txt" lines_diff="22"/>
+        </test>
+    </tests>
+
+    <help>@LAST_HELP@</help>
+    <citations><expand macro="citations"/></citations>
+</tool>
b
diff -r 000000000000 -r 9a7e91fc6562 macros_last.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros_last.xml Wed Jun 17 14:50:21 2020 -0400
[
@@ -0,0 +1,80 @@
+<macros>
+    <token name="@LAST_CONDA_VERSION@">1021</token>
+    <token name="@LAST_HELP@"><![CDATA[
+        Documentation : http://last.cbrc.jp/
+
+        LAST finds similar regions between sequences.
+
+        The main technical innovation is that LAST finds initial matches based on their multiplicity, instead of using a fixed length (e.g. BLAST uses 11-mers). To find these variable-length matches, it uses a suffix array (inspired by Vmatch). To achieve high sensitivity, it uses a spaced suffix array (or subset suffix array), analogous to spaced seeds (or subset seeds).
+
+        LAST can:
+            - Handle big sequence data, e.g:
+                - Compare two vertebrate genomes.
+                - Align billions of DNA reads to a genome.
+            - Indicate the reliability of each aligned column.
+            - Use sequence quality data properly.
+            - Compare DNA to proteins, with frameshifts.
+            - Compare PSSMs to sequences.
+            - Calculate the likelihood of chance similarities between random sequences.
+            - Do split and spliced alignment.
+            - Train alignment parameters for unusual kinds of sequence (e.g. nanopore).
+
+    ]]></token>
+    <xml name="citations">
+        <citation type="doi">10.1101/gr.113985.110</citation>
+    </xml>
+
+    <xml name="input_db">
+        <conditional name="db_opts">
+            <param name="db_opts_input" type="select" label="Reference database">
+              <option value="db">Locally installed LAST database</option>
+              <option value="lastdb" selected="true">LAST database from history</option>
+            </param>
+            <when value="db">
+                <param name="database" type="select" multiple="true" label="LAST database">
+                    <options from_data_table="lastdb" />
+                </param>
+            </when>
+            <when value="lastdb">
+                <param name="lastdatabase" type="data" label="LAST database" />
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="step_macro" token_step="1">
+     <param argument="-w" type="integer" value="@STEP@" label="Use initial matches starting at every w-th position in each sequence"/>
+     <param argument="-W" type="integer" value="1" label="Use minimum positions in sliding windows of W consecutive positions"/>
+     <!-- -m ignored if -u is defined scheme -->
+     <param argument="-m" type="integer" value="1" label="Seed pattern"/>
+    </xml>
+
+    <xml name="score_macro">
+        <section name="score_opt" title="Score options" expanded="false">
+     <param argument="-X" type="select" multiple="false" label="N/X is ambiguous">
+         <option value="0" selected="true">Neither sequence</option>
+         <option value="1">Reference</option>
+         <option value="2">Query</option>
+         <option value="3">Both</option>
+     </param>        
+     <!--
+     <param name="residue_pair_cost" argument="-c" type="integer" value="0" label="Unaligned residue pair cost"/>
+     <param name="frameshift_cost" argument="-F" type="integer" value="0" label="Frameshift cost"/>
+ -->
+     <param argument="-x" type="integer" value="21" label="Maximum score drop for preliminary gapped alignments"/>
+     <param argument="-y" type="integer" value="9" label="Maximum score drop for gapless alignments"/>
+     <param argument="-z" type="integer" value="21" label="Maximum score drop for final gapped alignments"/>
+     <param argument="-d" type="integer" value="13" label="Minimum score for gapless alignments"/>
+     <param argument="-e" type="integer" value="22" label="Minimum score for gapped alignments"/>
+ </section>
+    </xml>
+
+    <xml name="cost_macro" token_a="7" token_b="1">
+        <section name="cost_opt" title="Cost options" expanded="false">
+            <param argument="-a" type="integer" value="@A@" label="Gap existence cost"/>
+     <param argument="-b" type="integer" value="@B@" label="Gap extension cost"/>
+     <param argument="-A" type="integer" value="@A@" label="Insertion existence cost"/>
+     <param argument="-B" type="integer" value="@B@" label="Insertion extension cost"/>
+ </section>
+    </xml>
+
+</macros>
b
diff -r 000000000000 -r 9a7e91fc6562 maf-convert.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/maf-convert.xml Wed Jun 17 14:50:21 2020 -0400
[
@@ -0,0 +1,143 @@
+<tool id="last_maf_convert" name="MAF-convert" version="@LAST_CONDA_VERSION@+galaxy0" profile="18.01">
+
+    <description>read MAF-format alignments and write them in another format.</description>
+
+    <macros>
+        <import>macros_last.xml</import>
+    </macros>
+
+    <requirements>
+        <requirement type="package" version="@LAST_CONDA_VERSION@">last</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        maf-convert
+
+        $maf_convert.output_type.output_format 
+
+        $maf_convert.noheader
+
+        #if $maf_convert.output_type.output_format == 'psl'
+            $maf_convert.output_type.protein 
+            -j $maf_convert.output_type.j
+        #end if
+
+        #if $maf_convert.output_type.output_format == 'sam'
+            $maf_convert.output_type.sam_file.dictionary
+            #if $maf_convert.output_type.sam_file.dictionary == '-d'
+                -f $maf_convert.output_type.sam_file.f
+            #end if
+            #if $maf_convert.output_type.readfile != ''
+                -r $maf_convert.output_type.r
+            #end if
+        #end if
+
+        #if $maf_convert.output_type.output_format == 'blast' or $maf_convert.output_type.output_format == 'html'
+            -l $maf_convert.output_type.l
+        #end if
+
+        '$maf_file'
+
+        > '$outfile'
+
+    ]]></command>
+    <inputs>
+        <param name="maf_file" type="data" format="maf" label="MAF file to convert" />
+
+        <section name="maf_convert" title="MAF-convert arguments" expanded="true">
+
+            <param name="noheader" argument="-n" type="boolean" truevalue="-n" falsevalue="" checked="false" label="No header." help="Omit any header lines from the output"/>
+
+            <conditional name="output_type">
+                <param name="output_format" type="select" multiple="false" label="Output format">
+                    <option value="axt">axt</option>
+                    <option value="blast">blast</option>
+                    <option value="blasttab">blasttab</option>
+                    <option value="chain">chain</option>
+                    <option value="html">html</option>
+                    <option value="psl" selected="true">psl</option>
+                    <option value="sam">sam</option>
+                    <option value="tab">tab</option>
+                </param>
+                <when value="blast">
+                    <param argument="-l" type="integer" value="60" label="Line length"/>
+                </when>
+                <when value="html">
+                    <param argument="-l" type="integer" value="60" label="Line length"/>
+                </when>
+                <when value="psl">
+                    <param name="protein" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="false" label="Protein alignments." />
+                    <param argument="-j" type="integer" value="1" label="Join neighboring alignments" help="Join neighboring alignments if they are co-linear and separated by at most N letters."/>
+                </when>
+                <when value="sam">
+                    <conditional name="sam_file">
+                        <param name="dictionary" argument="-d" type="boolean" truevalue="-d" falsevalue="" checked="false" label="Include dictionary of sequence lengths in sam format"/>
+                        <when value="-d">
+                            <param argument="-f" type="data" format="dict" label="Get a sequence dictionary from DICTFILE" />
+                        </when>
+                    </conditional>     
+                    <param argument="-r" type="text" value="" label="Specify read group information" help="Example: 'ID:1 PL:ILLUMINA SM:mysample'"/>
+                </when>
+            </conditional>
+        </section>
+    </inputs>
+
+    <outputs>
+        <data format="maf" name="outfile" label="${on_string} to ${maf_convert.output_type.output_format}">
+            <change_format>
+                <when input="maf_convert.output_type.output_format" value="axt" format="axt" />
+                <when input="maf_convert.output_type.output_format" value="blast" format="blastxml" />
+                <when input="maf_convert.output_type.output_format" value="blasttab" format="blastxml" />
+                <when input="maf_convert.output_type.output_format" value="chain" format="txt" />
+                <when input="maf_convert.output_type.output_format" value="html" format="html" />
+                <when input="maf_convert.output_type.output_format" value="psl" format="tabular" />
+                <when input="maf_convert.output_type.output_format" value="sam" format="sam" />
+                <when input="maf_convert.output_type.output_format" value="tab" format="tabular" />
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="maf_file" value="last_align_train_gen.maf" ftype="maf"/>
+            <section name="maf_convert">
+                <conditional name="output_type">
+                    <param name="output_format" value="axt"/>
+                </conditional>
+            </section>
+            <output name="outfile" file="outfile_convert.axt"/>
+        </test>
+        <test>
+            <param name="maf_file" value="last_align_train_gen.maf" ftype="maf"/>
+            <section name="maf_convert">
+                <conditional name="output_type">
+                    <param name="output_format" value="blast"/>
+                </conditional>
+            </section>
+            <output name="outfile" file="outfile_convert.blast"/>
+        </test>
+        <test>
+            <param name="maf_file" value="last_align_train_gen.maf" ftype="maf"/>
+            <section name="maf_convert">
+                <conditional name="output_type">
+                    <param name="output_format" value="html"/>
+                </conditional>
+            </section>
+            <output name="outfile" file="outfile_convert.html"/>
+        </test>
+        <test>
+            <param name="maf_file" value="last_align_train_gen.maf" ftype="maf"/>
+            <section name="maf_convert">
+                <conditional name="output_type">
+                    <param name="output_format" value="tab"/>
+                </conditional>
+            </section>
+            <output name="outfile" file="outfile_convert.tab"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        This script reads alignments in maf format, and writes them in another format. It can write them in these formats: axt, blast, blasttab, chain, html, psl, sam, tab.
+    ]]></help>
+    <citations><expand macro="citations"/></citations>
+</tool>
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/fuguMito.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fuguMito.fa Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,330 @@\n+>fuguMito\n+GCTAGCGTAGCTTAACCAAAGCAGAGTACTGAAGATGCTAAGATGGGCCC\n+TGAAAAGTCCCGCAGGCACAAAAGCTTGGTCCTGACTTTACTAACAACTC\n+TGATCAAACTTACACATGCAAGTATCCGCATCCCAGTGAAaatgcccccc\n+gccccccgtcCGGAAATAGGGAGTTGGTATCAGGCACACAAATTTGTAGC\n+CCATGACACCTAGCTTTGCCACGCCCCCAAGGGAATTCAGCAGTGATAAA\n+CATTAAGCCATAAGTGAAAACTTGACTTAGTTATGATCTAAAGAGTCGGT\n+AAAACTCGTGCCAGCCACCGCGGTTATACGAGAGACCCAAGTTGTTAGCC\n+AACGGCGTAAAGGGTGGTTAgaactaaaaacaacaaactgagACCGAACA\n+CCTTCAAGGCTGTTATACGCTTCCGAAGCAACGAAGAACAATAACGAAAG\n+TAGCCTCACTAACTCGAACCCACGAAAGCTAGGACACAAACTGGGATTAG\n+ATACCCCACTATGCCTACCCCTAAACACGATATGAAACTACGTACATATC\n+CGCCTGGTTACTACGAGCATTAGCTTAAAACCCAAAGGACTTGGCGGTGC\n+TTTAAAACCATCTAGAGGAGCCTGTTTTAAAACCGATACTCCCCGTTCAA\n+CCTCACCCCTCCTTGTTTTAACCGCCTATATACCACCGTCGTCAGCCTAC\n+CCTGTGAAGGGCAAATAGTAGACAAAATTGGCACAGCCAAAAACGTCAGG\n+TCGAGGTGTAGCGAATGGAGGGGGACAAAATGGGCTACATTCTCTGCCTA\n+GAGAACACGAAAGATGTGCTGAAATGCACACCCGAAGGAGGATTTAGCAG\n+TAAGCAAGAAATAGAGTGTCATGCTGAAACCGGCTATGAAGCGCGCACAC\n+ACCGCCCGTCACTCTCCCCAAActcttaatttaaaaataactaATAAGCC\n+AccaaaagaaaaggggaggcAAGTCGTAACATGGTAAGTGTACCGGAAGG\n+TGCACTTGGAAAAACCGGAGCATAGCTTAACAGCTTAAAGCACCTCCCTT\n+ACACCGAGTTGACGCCCGTGCAAATCGAGCTGCCCCGACACCTAACAGCt\n+agcccccaccccacccacaaCAAACCACTATAAATACCCCCTAAGATACT\n+TaactaaacaaaacaaatcatttttccACCCTAGTATAGGAGATAGAAAA\n+GGAACTAGGAGCTATAGATAAAGTACCGCAAGGGAACGctgaaagagaaa\n+tgaaataacccagtaaagtaaaaaaaagcagagattaCACCTCGTACCTT\n+TTGCATCATGATTTAGCTAGTATAATTAGGCAAAGAGCACTTTAGTCTAA\n+CACCCCGAAACTGAATGAGCTACTCCAAGACAGCCTTTATAGGGCACATC\n+CGTCTCTGTGGCAAAAGAGTGGAAAGAGCTTTGAGTAGAGGTGATAAACC\n+TACCGAGTTCAGTTATAGCTGGTTGCCCGAGAACTGAGTATAAGCTCAGC\n+CTTTTGGCTTCTTAACTCCATAACTATTTATATTAACCCGACTTTAAGAA\n+ACCAAAAGAGTTAATCAAAGGGGGTACAGCCCCTTTGATACAAGAAACAA\n+CTTTTAACAGGAGGATAAGGATCATAAAAAATCAAGGCACCGCGCTTAAG\n+TAGGCTTAGAAGCAGCCACCACAAGAAAGCGTTAAAGCTCTAGCACATCC\n+CTGCCACAAATACCAATAAAACACTCCTAACCCCTTCCCCTACCGGGCTT\n+TTCTATGCTTCCATAGAAGAAATTATGCTAAAATGAGTAATAAGGGGCCG\n+ACCCCCTCCAAGCACAAGTGTACATCAGAACGAACCCCCACCGAAATTCA\n+ACGGACCCAACCAAAGAgggaaataaatattaaactcacaacaagaaaaa\n+catttaacacttttCCGTTACCCCTACACTGGTGTGCCAAATAGGAAaga\n+ctaaaagaaaaagaaggaactCGGCAAACTCAAAGCCTCGCCTGTTTACC\n+AAAAACATCGCCTCTTGCTTCAATGAATAAGAGGTCACGCCTGCCCTGTG\n+ACTATATGTTTAACGGCCGCGGTATTTTGACCGTGCAAAGGTAGCGCAAT\n+CACTTGTCCTTTAAATGTGGACCTGTATGAATGGCATAACGAGGGCTTAG\n+CTGTCTCCTTTCTCAAGTCAATGAACTTGATCTCCCCGTGCAGAAGCGGG\n+GATAAAACCATAAGACGAGAAGACCCTATGGAGCTTTAGACAAAAAACAG\n+CCCCTGTCAATAAACCCTAAATAAAGGGAATAAACCTAGTGAACCTGTTT\n+TAATGTCTTTGGTTGGGGCGACCGCGGGGTAACAAAAAACCCCCATGTGG\n+AATGAAAACACCCTTTTTAAACCCAAGAGTCACCACTCTAGGATACAGAA\n+CATCTGACCAATAATGATCGCCTAAAGCCGATTAACGAACCGAGTTACCC\n+TAGGGATAACAGCGCAATCCTCTTTTAGAGTCCATATCGACAAGAGGGTT\n+TACGACCTCGATGTTGGATCAGGACATCCTAATGGTGCAGCCGCTATTAA\n+AGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCG\n+GAGTAATCCAGGTCAGTTTCTATCTATGAAGTACCTTTTTCCAGTACGAA\n+AGGACCGAAAAAGAGGGGCCAATGTACAAACAAGCCCCACTCTCACTTGC\n+TGAATCCAGCTCAAGCAAATAAGAGAGTACAAAACtaagtcaaagaacat\n+gacatGTTAGTGTGGCAGAGCCCGGTATTGCAAAAGCCTTAAACCCTTCG\n+AACAGAGGTTCAACTCCTCTCCCTAACTATGACTACAATACTAATTACCC\n+ACTTAATTCACCCCCTAACCATTATTGTACCCGTTCTACTAGCCGTAGCT\n+TTCTTAACATTAATTGAACGAAAAGTTTTAGGTTACATGCAGTTACGAAA\n+GGGCCCCAACATCGTAGGACCTTACGGACTCCTCCAACCCATCGCCGATG\n+GCGTTAAACTTTTCATCAAAGAACCAGTCCGACCATCTACCTCCGCTCCC\n+ATCCTATTCATTATTGCCCCTACACTAGCCCTTACTCTCGCCATAATAAT\n+ATGAACACCAATGCCCCTCCCCTACCCCATCCTTGACTTAAATCTGGCCA\n+TTCTATTTGTCCTAGCTATCTCTAGCTTGGCAGTCTACTCTATTCTGGGC\n+TCCGGATGGGCCTCCAACTCAAAATATGCCCTTATAGGATCCCTACGAGC\n+AGTTGCACAAATAATCTCATACGAAGTAAGTCTAGGGCTAATCCTTTTAT\n+CattgattatttttacaggcAACTTTACCCTACAAACATTTAACGTCACC\n+CAGGAAAGCATTTGACTAATCATCCCAACATGACCCCTTGCAGCAATATG\n+ATACATCTCCACGCTAGCCGAAACAAACCGAGCCCCCTTTGACCTAACAG\n+AGGGGGAGTCCGAACTGGTATCTGGGTTCAATGTTGAATATGCAGGAGGT\n+CCCTTTGCCCTATTTTTTCTGGCAGAATATGCCAACATCCTCTTAATAAA\n+CACGCTCTCCACAATCCTGTTCCTAGGAGCCTTACATATGCCCGCTCTTC\n+CAGAACTAACCTCTATCAATTTAATatcaaaaacagccattttatcCCTC\n+ATCTTCCTATGAGCCCGAGCCTCCTACCCACGATTCCGATATGACCAGCT\n+AATACACCTCACATGAAAAAACTTCCTACCACTTACATTAGCATTCATTA\n+TCTGACATCTTGCACTCCCAACTACAATAGCAGGCCTTCCCCCTCAAATA\n+TAAAAGGAACTGTGCCT'..b'ATCAGCCCAATTCGGCCTTCATCCGTGACTTCCTTCTGCAATAGAAGGT\n+CCTACACCGGTCTCTGCCCTACTCCACTCCAGCACTATAGTTGTTGCAGG\n+AATCTTCTTAATAATCCGCATCTCCCCCCTCTTAGAAACCAACCCAACAG\n+CCCTCACACTCTGCCTATGCCTAGGAGCCCTAACCACCCTATTTACCGCC\n+ACCTGCGCCCTAACCCAGAACGATATCaaaaaaaTTGTAGCTTTTTCAAC\n+TTCCAGTCAACTAGGCCTAATGATAGTCACCATTGGCCTAAATCAGCCCC\n+AACTTGCCTTCCTGCACATCTGCACCCACGCTTTTTTCAAAGCCATATTA\n+TTCTTATGCTCTGGGTCTATTATTCACAGCTTAAATGATGAACAAGACAT\n+CCGCAAAATGGGAGGGATACACCACTTGACCCCTGTTACCTCTTCATGCC\n+TAACAATTGGCAGCTTAGCCCTGACCGGAACCCCCTTCCTAGCCGGCTTC\n+TTTTCCAAAGATGCCATCATCGAATCTTTAACCACCTCCCAATTAAACGC\n+CTGAGCCCTATGCCTCACCCTCCTAGCAACTTCTTTCACAGCTATCTACA\n+GCCTACGAGTCGTATTCTACGTATCCATAGGCCACCCTCGCTTTAATTCC\n+CTTTCACCAATCAATGAAAATAACCCATCTGTAATTAACCCTATCAAACG\n+ACTGGCATGAGGCAGCATTATTGCTGGCCTATTAATCACCACAAACCTTC\n+TCCCAACAAAAACACCTGTAATATCAATACCTATAGTTGTTAAACTTACT\n+GCTCTTATCGTCACAATCTTGGGACTCCTAATTGCCCTAGAATTAGCTTC\n+CTTAACCTCCAAACAACTTAAACCTACACCACACCTGTCCCCCCACCACT\n+TCTCAAACATACTTGGCTTCTTCCCAACAATTGTACACCGTGCCTCTCCT\n+AAAATTAATCTCATTTTAGGACAAACAATTGCTACCCAAATTATCGACCT\n+AACCTGACTAGAAAAAGTTGGACCCAAAACAATTTCATCTATCAACACTC\n+CCCTCATCTCTACCATCAGTAACATCCAACAAGGATCAATCAAGACATAc\n+cttgtcctcttcctcacgACCCTTGCTCTATCAACCCTCGTTCTTCTTAC\n+CTAACTGCTCGAAGAGCCCCCCGACCCAGCCCCCGCACCAGCTCTAATAC\n+TACAAGCAACGTCAATAACAAGACCCAGGCCCCCAATAGTAATACTCCCC\n+CACCGCTAGAATATATAAGTGAAACCCCGTCCATATCACCTCGAAAAACT\n+GCATCTCAGTCTATCTCTCCAGAAACCCCTCATCACACCTCTTCATCAAG\n+GACCATATTTGTGTACAAGataccaaaaacaaaaaaaaaatatccaaaaa\n+agaataaaaccacCTGTCAACCTGTAGGCGCCTTAGGATCCTTATCTGCA\n+GACAGCGCTGacgaataaataaacacaaccaGTATACCCCCCATATAAAT\n+CATTAACAACACCAAAGACAAGAAAGTTCCCCCGTGCAAAACTGAAGCCC\n+CACAGCAAAGAAGTGCAACCAGCACCAAATTGAAAACTCCATAAAAAGGA\n+GCAGGATTTGTAGACAACACAATTATTACAACCAACATCcctaataaaag\n+aaaaacaagcgcATAAAACATAGTTTCTGCCAGGATTTTAACCAGGACCT\n+ATGGCGTGAAAAACCATCGTTGTTACTCAACTACAAAAACACTAATGGCC\n+AGCCTACGCAAAACCCACCCCCTACTAAAAATCGTAAACGACATAGTAAT\n+TGACCTTCCTACCCCCTCAAACATTTCCGCCTGATGAAACTTTGGCTCTC\n+TACTCGGATTATGCCTTATTACACAAATCATCACAGGACTGTTCCTTGCA\n+ATACACTACACATCCGACATCTCTACCGCCTTTTCATCCGTAGCCCACAT\n+TTGCCGAGACGTAAACTACGGCTGACTAATTCGCAATCTACACGCAAACG\n+GTGcctcattcttttttatttgcttaTACTCCCACATCGGCCGAGGTCTT\n+TACTATGGCTCTTACCTAAGTAAAGAAACCTGAAACGTAGGGGTAGTCCT\n+CTTACTTTTAGTAATGGCCACCGCTTTCGTAGGCTACGTTCTTCCATGAG\n+GACAAATATCCTTCTGAGGCGCCACTGTAATTACAAACCTGCTCTCTGCT\n+GTCCCCTACGTAGGAAACACGCTCGTTCAATGAGTATGAGGAGGCTTTTC\n+AGTAGACAGCGCCACTCTAACACGATTCTTtgccttccacttcctcctcc\n+catTTATCGTTGCAGCCGCTGCCATCGtacatcttatttttcttcacgAA\n+ACAGGCTCCAACAATCCCCTAGGACTCAATTCAAACGCAGACAAAATCCC\n+ATTCCACCCATACTTCTCTTACAAAGACCTCCTGGGCTTCACAATCATAC\n+TCTCAGCCCTCGCAACACTCGCCCTATTCTCTCCAAACTACCTCGGAGAC\n+CCTGACAACTTCACACCAGCCAATCCTCTAGTTACCCCCGCCCACATTAA\n+ACCAGAATGGTATTTCCTATTTGCATACGCAATTCTACGATCTATCCCCA\n+ATAAGCTAGGAGGTGTTCTGGCCCTTCTTGCCTCAATCTTAATTCTTATA\n+GTAGTTCCTTTCTTACACACCTCTAAACAACGAAGCCTAACATTCCGCCC\n+ACTATCACAATTCCTATTCTGAACCCTAATTGCCGACGTCGTCATCCTAA\n+CCTGAATTGGAGGCATGCCCGTCGAACATCCTTACATTATTATCGGACAA\n+ATTGCCTCAGTACTTTACTTCTCTCTCTTCCTAATCTTGATGCCAATAGC\n+CGGTTGACTAGAAAATAAAATACTAAACTAACAAGCATTAGTAGCTCAGA\n+TTCAGAGCGTCGGTCTTGTAAACCGAATGTCGGGGGTTAAAATCCCCCCT\n+TATGCTCAAAAAGAAGGGACTTCAACCCCCACCACTGGCTCCCAAAGCCA\n+GCATTCTTAATTAAACTACTTTTTGataatacatatatgtattatcccca\n+ttcatatatattaaacattaatataatgcataattaaGACATAGTACTAT\n+ATATTCACCTATAGTTCCTATAACCCATAAAGCAAGTACAGGAagctaaa\n+aatgctaaaagcataactggaaaaatccctaaaaattgttcaaaaactga\n+acgaaatTTAAGACCGAACAATAAACTCATCAGTTAAGATATACCAGGAC\n+TCAACACCCCGTAAAATACCAATTATTAATGTAGTAAGAACCGACCATCA\n+GTTGATTTCTTAATGCATATTATTATTGAAGGTGAGGGACAATAACAGTG\n+GGGGTTTCACTAAATGAACTATTCCTGGCATTTGGTTCCTACTTCAGGGC\n+CATTAATCGATTTATTCCTCATTCTTTCATCGACGCTGACATAAGTTGTT\n+GGTGGAGTTCATCAGTGAGATAATCCCACATGCCGGGCGTTCTCTCCACA\n+GGGGTCAGgttattttttctctctttcctttcaaTTGACATTTCAGAGTG\n+CAGCGCGTCAATGGTTCATCAAGGTTGAACATTTTTTCTTGGTTTATGGT\n+AATGTTAATTAATGAATTAAGACATTATTTAAGAATTACATTACTGATAT\n+CAAGGACATAAATAATAATACGATTCAACAATCATACAATTTCAccccct\n+tcttctttttaaaaaaattaacgtataccccccctaccccccctaAAAAA\n+TAGGAGAGACCTTTAAGTTTGAACCAAGCTCTCCActtaattaaatattc\n+atcatattattatcatatattataatattataataatataattatat\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.bck
b
Binary file test-data/hedgdb.bck has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.des
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hedgdb.des Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,1 @@
+NP_002172.2
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.prj
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hedgdb.prj Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,26 @@
+version=1021
+alphabet=ACDEFGHIKLMNPQRSTVWY
+numofsequences=1
+numofletters=411
+maxsequenceletters=411
+letterfreqs=45 6 18 23 16 34 18 7 13 47 5 8 28 11 34 27 18 34 8 11
+maxunsortedinterval=0
+keeplowercase=1
+masklowercase=1
+numofindexes=1
+integersize=32
+totallength=413
+specialcharacters=310
+prefixlength=2
+subsetseed=A C G T
+subsetseed=AG CT
+subsetseed=A C G T
+subsetseed=ACGT
+subsetseed=ACGT
+subsetseed=A C G T
+subsetseed=A C G T
+subsetseed=ACGT
+subsetseed=ACGT
+subsetseed=A C G T
+subsetseed=ACGT
+subsetseed=A C G T
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.sds
b
Binary file test-data/hedgdb.sds has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.ssp
b
Binary file test-data/hedgdb.ssp has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.suf
b
Binary file test-data/hedgdb.suf has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgdb.tis
b
Binary file test-data/hedgdb.tis has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgehog_prot_drosophila.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hedgehog_prot_drosophila.fa Wed Jun 17 14:50:21 2020 -0400
[
@@ -0,0 +1,9 @@
+>NP_001034065.1 hedgehog [Drosophila melanogaster]
+MDNHSSVPWASAASVTCLSLDAKCHSSSSSSSSKSAASSISAIPQEETQTMRHIAHTQRCLSRLTSLVAL
+LLIVLPMVFSPAHSCGPGRGLGRHRARNLYPLVLKQTIPNLSEYTNSASGPLEGVIRRDSPKFKDLVPNY
+NRDILFRDEEGTGADRLMSKRCKEKLNVLAYSVMNEWPGIRLLVTESWDEDYHHGQESLHYEGRAVTIAT
+SDRDQSKYGMLARLAVEAGFDWVSYVSRRHIYCSVKSDSSISSHVHGCFTPESTALLESGVRKPLGELSI
+GDRVLSMTANGQAVYSEVILFMDRNLEQMQNFVQLHTDGGAVLTVTPAHLVSVWQPESQKLTFVFADRIE
+EKNQVLVRDVETGELRPQRVVKVGSVRSKGVVAPLTREGTIVVNSVAASCYAVINSQSLAHWGLAPMRLL
+STLEAWLPAKEQLHSSPKVVSSAQQQNGIHWYANALYKVKDYVLPQSWRHD
+
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/hedgehog_prot_human.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hedgehog_prot_human.fa Wed Jun 17 14:50:21 2020 -0400
[
@@ -0,0 +1,8 @@
+>NP_002172.2 indian hedgehog protein preproprotein [Homo sapiens]
+MSPARLRPRLHFCLVLLLLLVVPAAWGCGPGRVVGSRRRPPRKLVPLAYKQFSPNVPEKTLGASGRYEGK
+IARSSERFKELTPNYNPDIIFKDEENTGADRLMTQRCKDRLNSLAISVMNQWPGVKLRVTEGWDEDGHHS
+EESLHYEGRAVDITTSDRDRNKYGLLARLAVEAGFDWVYYESKAHVHCSVKSEHSAAAKTGGCFPAGAQV
+RLESGARVALSAVRPGDRVLAMGEDGSPTFSDVLIFLDREPHRLRAFQVIETQDPPRRLALTPAHLLFTA
+DNHTEPAARFRATFASHVQPGQYVLVAGVPGLQPARVAAVSTHVALGAYAPLTKHGTLVVEDVVASCFAA
+VADHHLAQLAFWPLRLFHSLAWGSWTPGEGVHWYPQLLYRLGRLLLEEGSFHPLGMSGAGS
+
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humanMito.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humanMito.fa Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,333 @@\n+>humanMito\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT\n+TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG\n+GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT\n+CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA\n+AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT\n+GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA\n+AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC\n+CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT\n+TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA\n+TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC\n+GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC\n+CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA\n+AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA\n+ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC\n+AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG\n+GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC\n+ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT\n+TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC\n+GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT\n+TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA\n+CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC\n+ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC\n+CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA\n+GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA\n+GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC\n+TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT\n+AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG\n+TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT\n+ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT\n+GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC\n+TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA\n+GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG\n+AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT\n+CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC\n+CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG\n+CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA\n+TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT\n+CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA\n+GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC\n+GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT\n+ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT\n+TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG\n+TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA\n+GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA\n+AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT\n+GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA\n+TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA\n+GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA\n+ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG\n+GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT\n+TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC\n+CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca\n+taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt\n+tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg\n+cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA\n+ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA\n+TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA\n+GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT\n+CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC\n+CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT\n+CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA\n+TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT\n+CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC\n+TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT\n+TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg\n+taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct\n+taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA\n+TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA\n+CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC\n+CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA\n+CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC\n+GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT\n+CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT\n+CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC\n+GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT\n+CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC\n+TTATCACAACACAAGA'..b'TTAGTTACCGCTAACAACCTATT\n+CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA\n+TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA\n+GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG\n+ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA\n+ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA\n+GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA\n+AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG\n+CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA\n+CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC\n+AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT\n+CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA\n+CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT\n+ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG\n+ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC\n+TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG\n+TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA\n+ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC\n+TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC\n+CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA\n+AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC\n+ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT\n+CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA\n+ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT\n+TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC\n+CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC\n+TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC\n+TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT\n+CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC\n+CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT\n+TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG\n+CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT\n+ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA\n+CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA\n+CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA\n+ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA\n+AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA\n+TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA\n+GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT\n+CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA\n+CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG\n+ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT\n+CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT\n+CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA\n+GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA\n+CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA\n+ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC\n+CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT\n+CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT\n+GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC\n+GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA\n+CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT\n+TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC\n+GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT\n+CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT\n+TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC\n+GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT\n+CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC\n+CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA\n+GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG\n+CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC\n+TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA\n+CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC\n+TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA\n+TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG\n+GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA\n+GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG\n+TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC\n+ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA\n+CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT\n+TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC\n+TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT\n+AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC\n+AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG\n+ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC\n+ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC\n+CTTAAATAAGACATCACGATG\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.bck
b
Binary file test-data/humdb.bck has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.des
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humdb.des Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,1 @@
+humanMito
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.prj
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/humdb.prj Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,27 @@
+version=1021
+alphabet=ACGT
+numofsequences=1
+numofletters=16571
+maxsequenceletters=16571
+letterfreqs=5113 5192 2180 4086
+maxunsortedinterval=0
+keeplowercase=0
+tantansetting=1
+masklowercase=1
+numofindexes=1
+integersize=32
+totallength=16573
+specialcharacters=451
+prefixlength=10
+subsetseed=A C G T
+subsetseed=AG CT
+subsetseed=A C G T
+subsetseed=ACGT
+subsetseed=ACGT
+subsetseed=A C G T
+subsetseed=A C G T
+subsetseed=ACGT
+subsetseed=ACGT
+subsetseed=A C G T
+subsetseed=ACGT
+subsetseed=A C G T
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.sds
b
Binary file test-data/humdb.sds has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.ssp
b
Binary file test-data/humdb.ssp has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.suf
b
Binary file test-data/humdb.suf has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/humdb.tis
b
Binary file test-data/humdb.tis has changed
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/last_align_gen.maf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/last_align_gen.maf Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,1201 @@\n+# LAST version 1021\n+#\n+# a=7 b=1 A=7 B=1 e=22 d=13 x=21 y=9 z=21 D=1e+06\n+# R=10 u=0 s=2 S=0 M=0 T=0 m=10 l=1 L=999999999 n=10 k=1 w=1000 t=2.82533 j=3 Q=0\n+# ref_db\n+# Reference sequences=1 normal letters=16571\n+#\n+#    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  J  Z  X  *\n+# A  5 -2 -2 -2 -1 -1 -1  0 -2 -2 -2 -1 -1 -3 -1  1  0 -3 -2  0 -2 -2 -1 -1 -6\n+# R -2  6 -1 -2 -4  1 -1 -3  0 -3 -3  2 -2 -4 -2 -1 -1 -4 -3 -3 -1 -3  0 -1 -6\n+# N -2 -1  6  1 -3  0 -1 -1  0 -4 -4  0 -3 -4 -3  0  0 -4 -3 -4  5 -4  0 -1 -6\n+# D -2 -2  1  6 -4 -1  1 -2 -2 -4 -5 -1 -4 -4 -2 -1 -1 -6 -4 -4  5 -5  1 -1 -6\n+# C -1 -4 -3 -4  9 -4 -5 -4 -4 -2 -2 -4 -2 -3 -4 -2 -1 -3 -3 -1 -4 -2 -4 -1 -6\n+# Q -1  1  0 -1 -4  6  2 -2  1 -3 -3  1  0 -4 -2  0 -1 -3 -2 -3  0 -3  4 -1 -6\n+# E -1 -1 -1  1 -5  2  6 -3  0 -4 -4  1 -2 -4 -2  0 -1 -4 -3 -3  1 -4  5 -1 -6\n+# G  0 -3 -1 -2 -4 -2 -3  6 -3 -5 -4 -2 -4 -4 -3 -1 -2 -4 -4 -4 -1 -5 -3 -1 -6\n+# H -2  0  0 -2 -4  1  0 -3  8 -4 -3 -1 -2 -2 -3 -1 -2 -3  2 -4 -1 -4  0 -1 -6\n+# I -2 -3 -4 -4 -2 -3 -4 -5 -4  5  1 -3  1 -1 -4 -3 -1 -3 -2  3 -4  3 -4 -1 -6\n+# L -2 -3 -4 -5 -2 -3 -4 -4 -3  1  4 -3  2  0 -3 -3 -2 -2 -2  1 -4  3 -3 -1 -6\n+# K -1  2  0 -1 -4  1  1 -2 -1 -3 -3  5 -2 -4 -1 -1 -1 -4 -3 -3 -1 -3  1 -1 -6\n+# M -1 -2 -3 -4 -2  0 -2 -4 -2  1  2 -2  6  0 -3 -2 -1 -2 -2  1 -3  2 -1 -1 -6\n+# F -3 -4 -4 -4 -3 -4 -4 -4 -2 -1  0 -4  0  6 -4 -3 -2  0  3 -1 -4  0 -4 -1 -6\n+# P -1 -2 -3 -2 -4 -2 -2 -3 -3 -4 -3 -1 -3 -4  8 -1 -2 -5 -4 -3 -2 -4 -2 -1 -6\n+# S  1 -1  0 -1 -2  0  0 -1 -1 -3 -3 -1 -2 -3 -1  5  1 -4 -2 -2  0 -3  0 -1 -6\n+# T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -2 -1 -1 -2 -2  1  5 -4 -2  0 -1 -1 -1 -1 -6\n+# W -3 -4 -4 -6 -3 -3 -4 -4 -3 -3 -2 -4 -2  0 -5 -4 -4 11  2 -3 -5 -3 -3 -1 -6\n+# Y -2 -3 -3 -4 -3 -2 -3 -4  2 -2 -2 -3 -2  3 -4 -2 -2  2  7 -2 -3 -2 -3 -1 -6\n+# V  0 -3 -4 -4 -1 -3 -3 -4 -4  3  1 -3  1 -1 -3 -2  0 -3 -2  4 -4  2 -3 -1 -6\n+# B -2 -1  5  5 -4  0  1 -1 -1 -4 -4 -1 -3 -4 -2  0 -1 -5 -3 -4  5 -4  0 -1 -6\n+# J -2 -3 -4 -5 -2 -3 -4 -5 -4  3  3 -3  2  0 -4 -3 -1 -3 -2  2 -4  3 -3 -1 -6\n+# Z -1  0  0  1 -4  4  5 -3  0 -4 -3  1 -1 -4 -2  0 -1 -3 -3 -3  0 -3  5 -1 -6\n+# X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -6\n+# * -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6  1\n+#\n+# Coordinates are 0-based.  For - strand matches, coordinates\n+# in the reverse complement of the 2nd sequence are used.\n+#\n+# name start alnSize strand seqSize alignment\n+#\n+# batch 0\n+a score=62488\n+s humanMito 578 15992 + 16571 GTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTCACATCA-CCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATC---AAAAGGGA-CAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAA-CAGCAGTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATA--CTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAAC---TCACCT---GAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTA-CGAAAGTGGCTTTAACATATCT-GAACACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTC-TTGCT----CAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTA-CCCA-CGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAA--CTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAaagtgcacTTGGACGAACCAGAGTGTAGCTTAACA-C--AAAGCACCCAACTTACACTTAGGAGAT-----TTCAACTTAA-CTTGACCG-CTC-TGA--GCTAAACCTAGCCCCA-----AAC---CCACT-------CCACCTTA-CTACCAGAC-AACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAA---AATT---ATAACC-----AAGCATAATATAGCAAGGACTAACCCCT-ATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAG-CTAAGACCCCCGAAACCAGACGAGCTAC-CTAAGAACAGC--TAAAAGAGCACACCCGTCTA'..b'TTTATCTTTAGG----C-GGTATGCACTTTTAACAGT-CACCCCCCAA--CTAACACA----TTATTTT\n+s fuguMito  11998 148 - 16447 GAATTTG-TAATAGAAGGGAGAATGGTGCGATTTTTTGTCATG------TGGCCAGGA----TAAGTCCAGTTGTAAGGTCTAG--TCCTTGTATTA-CTTCTGGGAGTCAGGTGTGGAGTGGGGCTAGTCCAATTTTTAAGGCTAATGCAATGGTAATTAT\n+\n+a score=222\n+s humanMito 188 106 + 16571 ACATACCTACTAAAGTGTGTTAATtaattaaTGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCGCTT--TCCACACAGACA--TCATAACAaaaaattt\n+s fuguMito    0 103 - 16447 atata---attatattattataatattataatatatg-ataataatatgatgaatatttaat-taagTGGAG-AGCTTGGTTCAAACTTAAAGGTC-TCTCCTATTTTTt\n+\n+a score=208\n+s humanMito 1936 122 + 16571 AAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAA--TCTTAGTTCAACTTTAAATTTG\n+s fuguMito     0 105 - 16447 atataattatattattataa-tattataata-tatgat-aataatatgatgaa--tatttaatta---ag--TGGAG--AGCTTG--GTTCAAACTTAAAGGTC-----TCTCCTATTTTTtag\n+\n+a score=206\n+s humanMito 5338 109 + 16571 TCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCT\n+s fuguMito     1 102 - 16447 tataattata--ttattataatattataatatatgataataatatgatgaatatttaattaagTGGAGA--GCTTG---GTTCAAACTTAAAGGTCTCTCCTATTTTTt\n+\n+a score=188\n+s humanMito 3463 103 + 16571 ATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACC-ATCG--CTCTTCTACTATGAA\n+s fuguMito     0 104 - 16447 atataattatattattataatattataatatatgataataatatgatgaatatttaattaagTGGA--GAGCTTGGTTCAAACTTAAAGGTCTCTCCTATTTTTta\n+\n+a score=187\n+s humanMito 4362 121 + 16571 ATCCAAAATTCTCCGTGCCACCTATCACACCCCATCCTAAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATACCCT-----TCCCGTACTAATTAATCCCCTGG\n+s fuguMito     0 106 - 16447 at--ataattatat--------tattataat--attataatatatgataa---taatatgatgaatatttaattaagTGGAGAGCTTGGTTCAAACTTAAAGGTCTC-TCCTATTTTTt----agg\n+\n+a score=187\n+s humanMito 16074 109 + 16571 ATCAACA-ACCGCTATG-TATT-TCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGT----ACATAAAAACCCAACCCACATCAaa\n+s fuguMito      2 102 - 16447 ataattatattattataatattataatatatgataataa--tatgatgaatatt-----------taattaagTGGAGAGCT-TGGTTCAAACTTAAAGGTCTCTCCTATTTTTta\n+\n+a score=184\n+s humanMito 11733 134 + 16571 TACTATTCTGCCTAGCAAACTCAAACTACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGGACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCCTCGCTAACCTCGCCTTA\n+s fuguMito      1 103 - 16447 tataattat----attattataatatta---------taatatatg-ataataat---------atga--tgaatatttaattaagTGGAGAGCTTGGTTCAAACTTAAAG---GTCTCTCCTATTT---TTta\n+\n+a score=183\n+s humanMito 13635 111 + 16571 TCAACCTCGCTTCCCCACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGC-AGCCGGAAGCCTATTCGCAGGATTTCTC--ATTACTAA\n+s fuguMito      1 103 - 16447 tataattatatta-------tta-taatattataatatatgataataatatgatgaata--tttaattaagTGGAGAGCTTGGTTCAAACTTAAAGG-TCTCTCCTATTTTTta\n+\n+a score=173\n+s humanMito 16462 105 + 16571 ATAACACT-TGGGGGTAGCTAA-AGTGAACTGTATCCGA----CATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTC-CCCTTAAATAAGACATCAC\n+s fuguMito      0  94 - 16447 atataattatattatta--taatattataatatat--gataataatatgatgaatattt------aattaag--TGGAGAGCTTG---GTTCAAACTTAA--AGGTC-TCTC\n+\n+a score=170\n+s humanMito 9402 110 + 16571 ACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATACGGGATAATCCTATTTATTACCTCAGA-AGTTTTTTTCT---TCGCAGGATTTTTCTGAGCCTTTTA\n+s fuguMito     0 104 - 16447 atataattatattattata----atattataatatatgataataatatg--atgaa--tatttaattaagTGGAGAGCTTGGTTCAAACTTAAAGGTCTCTCCTAT--TTTTta\n+\n+a score=168\n+s humanMito 14018 110 + 16571 TATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAA-------CTTTACTTCCTCTCTTTct--tct-tcccactcatccta\n+s fuguMito      1 103 - 16447 tataat-tatattattataatattataatat----------atgataa--taatatgatgaatatttaattaagTGGAGAGCTTGG-TTC-AAACTTAAAGGTCTCTCCTATTTTT--ta\n+\n+a score=144\n+s humanMito 416 94 + 16571 ATGCACTTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCAT-ACTACTAATCTCA---------TCAATACAACCCCC---GCCCATCCTA\n+s fuguMito    0 97 - 16447 atataattatatta-tta------taatattataatatatgataat---aatatgatgaatatttaattaagTGGAGAGCTTGGTTCAAACTTAAAGGTCTCTCCTA\n+\n+# Query sequences=1\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/last_align_prot.maf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/last_align_prot.maf Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,66 @@
+# LAST version 1021
+#
+# a=11 b=2 A=11 B=2 e=22 d=13 x=21 y=9 z=21 D=1e+06 E=2.73494e+09
+# R=10 u=0 s=1 S=0 M=0 T=0 m=10 l=1 L=999999999 n=10 k=1 w=1000 t=3.08611 j=3 Q=0
+# ref_db
+# Reference sequences=1 normal letters=411
+# lambda=0.305935 K=0.102404
+#
+#    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  J  Z  X  *
+# A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1 -1 -1 -4
+# R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1 -2  0 -1 -4
+# N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  4 -3  0 -1 -4
+# D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3  4 -3  1 -1 -4
+# C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -1 -3 -1 -4
+# Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2  0 -2  4 -1 -4
+# E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2  1 -3  4 -1 -4
+# G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -1 -4 -2 -1 -4
+# H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3  0 -3  0 -1 -4
+# I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -3  3 -3 -1 -4
+# L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4  3 -3 -1 -4
+# K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2  0 -3  1 -1 -4
+# M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -3  2 -1 -1 -4
+# F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -3  0 -3 -1 -4
+# P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -2 -3 -1 -1 -4
+# S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2  0 -2  0 -1 -4
+# T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -1 -1 -1 -1 -4
+# W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4 -2 -2 -1 -4
+# Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -3 -1 -2 -1 -4
+# V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -3  2 -2 -1 -4
+# B -2 -1  4  4 -3  0  1 -1  0 -3 -4  0 -3 -3 -2  0 -1 -4 -3 -3  4 -3  0 -1 -4
+# J -1 -2 -3 -3 -1 -2 -3 -4 -3  3  3 -3  2  0 -3 -2 -1 -2 -1  2 -3  3 -3 -1 -4
+# Z -1  0  0  1 -3  4  4 -2  0 -3 -3  1 -1 -3 -1  0 -1 -2 -2 -2  0 -3  4 -1 -4
+# X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -4
+# * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1
+#
+# Coordinates are 0-based.  For - strand matches, coordinates
+# in the reverse complement of the 2nd sequence are used.
+#
+# name start alnSize strand seqSize alignment
+#
+# batch 0
+a score=729 EG2=1.4e-80 E=1.2e-101
+s NP_002172.2    13 264 + 411 LVLLLLLVVPA----AWGCGPGRVVGSRRRPPRKLVPLAYKQFSPNVPEKTLGASGRYEGKIARSSERFKELTPNYNPDIIFKDEENTGADRLMTQRCKDRLNSLAISVMNQWPGVKLRVTEGWDEDGHHSEESLHYEGRAVDITTSDRDRNKYGLLARLAVEAGFDWVYYESKAHVHCSVKSEHSAAAKTGGCFPAGAQVRLESGARVALSAVRPGDRVLAMGEDGSPTFSDVLIFLDREPHRLRAFQVIETQDPPRRLALTPAHLL
+s NP_001034065.1 66 265 + 471 LVALLLIVLPMVFSPAHSCGPGRGLGRHRA--RNLYPLVLKQTIPNLSEYTNSASGPLEGVIRRDSPKFKDLVPNYNRDILFRDEEGTGADRLMSKRCKEKLNVLAYSVMNEWPGIRLLVTESWDEDYHHGQESLHYEGRAVTIATSDRDQSKYGMLARLAVEAGFDWVSYVSRRHIYCSVKSDSSISSHVHGCFTPESTALLESGVRKPLGELSIGDRVLSMTANGQAVYSEVILFMDRNLEQMQNFVQLHT-DGGAVLTVTPAHLV
+
+a score=131 EG2=0.4 E=4.1e-14
+s NP_002172.2    293 95 + 411 FASHVQPGQYVLVAGVPG--LQPARVAAVSTHVALGAYAPLTKHGTLVVEDVVASCFAAVADHHLAQLAFWPLRLFHSLAWGSWTPG-EGVHWYPQLL
+s NP_001034065.1 344 96 + 471 FADRIEEKNQVLVRDVETGELRPQRVVKVGSVRSKGVVAPLTREGTIVVNSVAASCYAVINSQSLAHWGLAPMRLLSTLE--AWLPAKEQLHSSPKVV
+
+a score=41 EG2=3.7e+11 E=0.06
+s NP_002172.2    91 34 + 411 KDEENTGADRLMTQRCKDRLNSLAISVMNQWPGV
+s NP_001034065.1 44 34 + 471 QEETQTMRHIAHTQRCLSRLTSLVALLLIVLPMV
+
+a score=30 EG2=1.1e+13 E=1.8
+s NP_002172.2    180 18 + 411 ESKAHVHCSVKSEHSAAA
+s NP_001034065.1  20 18 + 471 DAKCHSSSSSSSSKSAAS
+
+a score=24 EG2=6.6e+13 E=12
+s NP_002172.2     26 23 + 411 GCGPGRVVGSRRRPPRKLVPLAY
+s NP_001034065.1 150 21 + 471 GTGADRLMSKRCK--EKLNVLAY
+
+a score=24 EG2=6.6e+13 E=12
+s NP_002172.2    106 27 + 411 CKDRLNSLAISVMNQWPGVKLRVTEGW
+s NP_001034065.1 399 27 + 471 CYAVINSQSLAHWGLAPMRLLSTLEAW
+
+# Query sequences=1
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/last_align_train_gen.maf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/last_align_train_gen.maf Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,47 @@\n+# LAST version 1021\n+#\n+# a=29 b=2 A=28 B=1 e=106 d=63 x=105 y=48 z=105 D=1e+06 E=2.9693e+07\n+# R=10 u=2 s=2 S=0 M=0 T=0 m=10 l=1 L=999999999 n=10 k=1 w=1000 t=4.80115 j=3 Q=0\n+# ref_db\n+# Reference sequences=1 normal letters=16571\n+# lambda=0.213867 K=0.208\n+#\n+#     A   C   G   T   M   S   K   W   R   Y   B   D   H   V\n+# A   4  -3  -2  -5   2  -3  -4   1   2  -4  -3   1   0   1\n+# C  -3   4  -7  -2   2   2  -3  -2  -4   2   1  -3   1   1\n+# G  -3  -8   7  -9  -5   3   3  -5   3  -9   1   1  -6   1\n+# T  -5  -2  -7   4  -3  -3   2   1  -6   2   1   0   1  -4\n+# M   2   2  -4  -3   2   0  -3   0   0   0  -1  -1   0   1\n+# S  -3   2   3  -4   0   2   0  -3   0   0   1  -1  -1   1\n+# K  -4  -3   2   2  -4   0   2   0  -1   0   1   1  -1  -1\n+# W   1  -2  -4   1   0  -3   0   1   0   0  -1   1   0  -1\n+# R   2  -4   3  -6   0   0  -1   0   3  -5  -2   1  -1   1\n+# Y  -4   2  -7   2   0   0   0   0  -5   2   1  -1   1  -1\n+# B  -4   1   1   1  -1   1   1  -1  -2   1   1   0   0  -1\n+# D   1  -3   1   0  -1  -1   1   1   1  -1   0   1   0   0\n+# H   0   1  -5   1   0  -1  -1   0  -1   1   0   0   0   0\n+# V   1   1   1  -4   1   1  -1  -1   1  -1  -1   0   0   1\n+#\n+# Coordinates are 0-based.  For - strand matches, coordinates\n+# in the reverse complement of the 2nd sequence are used.\n+#\n+# name start alnSize strand seqSize alignment\n+#\n+# batch 0\n+a score=22420 EG2=0 E=0\n+s humanMito 584 13529 + 16571 GTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC-ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCAC-CACGATCAAAAGGGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAA-CAGCAGTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTA-CGAAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCT----CAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGT-AAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATAT----AGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAaagtgcacTTGGACGAACCAGAGTGTAGCTTAACA---CAAAGCACCCAACTTACACTTAGGAGAT----------TTCAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCACCT-------TACTACCAGACAACCTTAGCCAAACCAT--------TTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAG-----------ATGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGC--TAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGA-TAGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACT----------------GTTAGTCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACAC--------------CCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACC--CACTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTCC--GCATAAGCCTGCGTCAGATCAAAACACTGAACTGACAATTAACAGCCCAATA-------------------TCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTA--CCCCGCCTGTTTACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGAC-ACATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATGACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTACCTAA---CAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAacctccgagcagtaCATGCTAAGACTTCACCAGTCAAAGCG-------------------AACTACTATACTCAATTGATCCAATAAC-TTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATC'..b'CAAACATACTTGGCTTCTTCCCAACAATTGTACACCGTGCCTCTCCTAAAATTAATCTCATTTTAGGACAAACAATTGCTACCCAAATTATCGACCTAACCTGACTAGAAAAAGTTGGACCCAAAACAATTTCATCTATCAACACTCCCCTCATCTCTACCATCAGTAACATCCAACAAGGATCAATCAAGACATAccttgtcctcttcctc\n+\n+a score=2873 EG2=3e-250 E=1.3e-259\n+s humanMito 14432 1601 + 16571 GCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATAAATtaaaaaaactattaaacCCATATAACCTCCCCC-----AAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATcttacccttcaTTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTcttccttctctccttaatGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTaatcctaatacCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATA-CACCAGTCTTGTAAAC-------CGGAGACGAAAACCTTTTTCCAAGGACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCAT\n+s fuguMito  14029 1605 + 16447 GCCTTAGGATCCTTATCTGCAGACAGCGCTGacgaataaataaacacaaccaGTATACCCCCCATATAAATCATTAACAACACCAAAGACAAGAAAGTTCCCCCGTGCAAAACTGAAGCCCCACAGCAAAGAAGTGCAACCAGCACCAAATTG-----AAAACTCCATAAAAAGGAGCAGGATTTGTAGACAACACAATTATTACAACCAACATCcctaataaaagaaaaacaagcgcATAAAACATAGTTTCTGCCAGGATTTTAACCAGGACCTATGGCGTGAAAAACCATCGTTGTTACTCAACTACAAAAACACTAATGGCCAGCCTACGCAAAACCCACCCCCTACTAAAAATCGTAAACGACATAGTAATTGACCTTCCTACCCCCTCAAACATTTCCGCCTGATGAAACTTTGGCTCTCTACTCGGATTATGCCTTATTACACAAATCATCACAGGACTGTTCCTTGCAATACACTACACATCCGACATCTCTACCGCCTTTTCATCCGTAGCCCACATTTGCCGAGACGTAAACTACGGCTGACTAATTCGCAATCTACACGCAAACGGTGcctcattcttttttatttgcttaTACTCCCACATCGGCCGAGGTCTTTACTATGGCTCTTACCTAAGTAAAGAAACCTGAAACGTAGGGGTAGTCCTCTTACTTTTAGTAATGGCCACCGCTTTCGTAGGCTACGTTCTTCCATGAGGACAAATATCCTTCTGAGGCGCCACTGTAATTACAAACCTGCTCTCTGCTGTCCCCTACGTAGGAAACACGCTCGTTCAATGAGTATGAGGAGGCTTTTCAGTAGACAGCGCCACTCTAACACGATTCTTtgccttccacttcctcctcccatTTATCGTTGCAGCCGCTGCCATCGtacatcttatttttcttcacgAAACAGGCTCCAACAATCCCCTAGGACTCAATTCAAACGCAGACAAAATCCCATTCCACCCATACTTCTCTTACAAAGACCTCCTGGGCTTCACAATCATACTCTCAGCCCTCGCAACACTCGCCCTATTCTCTCCAAACTACCTCGGAGACCCTGACAACTTCACACCAGCCAATCCTCTAGTTACCCCCGCCCACATTAAACCAGAATGGTATTTCCTATTTGCATACGCAATTCTACGATCTATCCCCAATAAGCTAGGAGGTGTTCTGGCCCTTCTTGCCTCAATCTTAATTCTTATAGTAGTTCCTTTCTTACACACCTCTAAACAACGAAGCCTAACATTCCGCCCACTATCACAATTCCTATTCTGAACCCTAATTGCCGACGTCGTCATCCTAACCTGAATTGGAGGCATGCCCGTCGAACATCCTTACATTATTATCGGACAAATTGCCTCAGTACTTTACTTCTCTCTCTTCCTAATCTTGATGCCAATAGCCGGTTGACTAGAAAATAAAATACTAAACT-AACAAGCATTAGTAGCTCAGATTCAGAGCGTCGGTCTTGTAAACCGAATGTCGGGGGTTAAAATCCCCCCTTATGCTCAA---AAAGAAGGGACTTCAACCCCCACCACTGGCTCCCAAAGCCAGCATTCTTAATTAAACTACTTTTTGataatacat\n+\n+a score=178 EG2=6.1 E=3.3e-09\n+s humanMito 16456 71 + 16571 GGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGCCATAAA\n+s fuguMito  15935 71 + 16447 GGGACAATAACAGTGGGGGTTTCACTAAATGAACTATTCCTGGCATTTGGTTCCTACTTCAGGGCCATTAA\n+\n+a score=118 EG2=2.3e+06 E=0.0012\n+s humanMito 5826 49 + 16571 TGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTT\n+s fuguMito  4680 49 - 16447 TGGACAAGAGGGTTTTAACCTCTGTTTTTAGAATCACAATCTAATGTTT\n+\n+# Query sequences=1\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/last_split.maf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/last_split.maf Wed Jun 17 14:50:21 2020 -0400
[
b'@@ -0,0 +1,96 @@\n+# LAST version 1021\n+#\n+# a=7 b=1 A=7 B=1 e=22 d=13 x=21 y=9 z=21 D=1e+06\n+# R=10 u=0 s=2 S=0 M=0 T=0 m=10 l=1 L=999999999 n=10 k=1 w=1000 t=2.82533 j=3 Q=0\n+# ref_db\n+# Reference sequences=1 normal letters=16571\n+#\n+#    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  J  Z  X  *\n+# A  5 -2 -2 -2 -1 -1 -1  0 -2 -2 -2 -1 -1 -3 -1  1  0 -3 -2  0 -2 -2 -1 -1 -6\n+# R -2  6 -1 -2 -4  1 -1 -3  0 -3 -3  2 -2 -4 -2 -1 -1 -4 -3 -3 -1 -3  0 -1 -6\n+# N -2 -1  6  1 -3  0 -1 -1  0 -4 -4  0 -3 -4 -3  0  0 -4 -3 -4  5 -4  0 -1 -6\n+# D -2 -2  1  6 -4 -1  1 -2 -2 -4 -5 -1 -4 -4 -2 -1 -1 -6 -4 -4  5 -5  1 -1 -6\n+# C -1 -4 -3 -4  9 -4 -5 -4 -4 -2 -2 -4 -2 -3 -4 -2 -1 -3 -3 -1 -4 -2 -4 -1 -6\n+# Q -1  1  0 -1 -4  6  2 -2  1 -3 -3  1  0 -4 -2  0 -1 -3 -2 -3  0 -3  4 -1 -6\n+# E -1 -1 -1  1 -5  2  6 -3  0 -4 -4  1 -2 -4 -2  0 -1 -4 -3 -3  1 -4  5 -1 -6\n+# G  0 -3 -1 -2 -4 -2 -3  6 -3 -5 -4 -2 -4 -4 -3 -1 -2 -4 -4 -4 -1 -5 -3 -1 -6\n+# H -2  0  0 -2 -4  1  0 -3  8 -4 -3 -1 -2 -2 -3 -1 -2 -3  2 -4 -1 -4  0 -1 -6\n+# I -2 -3 -4 -4 -2 -3 -4 -5 -4  5  1 -3  1 -1 -4 -3 -1 -3 -2  3 -4  3 -4 -1 -6\n+# L -2 -3 -4 -5 -2 -3 -4 -4 -3  1  4 -3  2  0 -3 -3 -2 -2 -2  1 -4  3 -3 -1 -6\n+# K -1  2  0 -1 -4  1  1 -2 -1 -3 -3  5 -2 -4 -1 -1 -1 -4 -3 -3 -1 -3  1 -1 -6\n+# M -1 -2 -3 -4 -2  0 -2 -4 -2  1  2 -2  6  0 -3 -2 -1 -2 -2  1 -3  2 -1 -1 -6\n+# F -3 -4 -4 -4 -3 -4 -4 -4 -2 -1  0 -4  0  6 -4 -3 -2  0  3 -1 -4  0 -4 -1 -6\n+# P -1 -2 -3 -2 -4 -2 -2 -3 -3 -4 -3 -1 -3 -4  8 -1 -2 -5 -4 -3 -2 -4 -2 -1 -6\n+# S  1 -1  0 -1 -2  0  0 -1 -1 -3 -3 -1 -2 -3 -1  5  1 -4 -2 -2  0 -3  0 -1 -6\n+# T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -2 -1 -1 -2 -2  1  5 -4 -2  0 -1 -1 -1 -1 -6\n+# W -3 -4 -4 -6 -3 -3 -4 -4 -3 -3 -2 -4 -2  0 -5 -4 -4 11  2 -3 -5 -3 -3 -1 -6\n+# Y -2 -3 -3 -4 -3 -2 -3 -4  2 -2 -2 -3 -2  3 -4 -2 -2  2  7 -2 -3 -2 -3 -1 -6\n+# V  0 -3 -4 -4 -1 -3 -3 -4 -4  3  1 -3  1 -1 -3 -2  0 -3 -2  4 -4  2 -3 -1 -6\n+# B -2 -1  5  5 -4  0  1 -1 -1 -4 -4 -1 -3 -4 -2  0 -1 -5 -3 -4  5 -4  0 -1 -6\n+# J -2 -3 -4 -5 -2 -3 -4 -5 -4  3  3 -3  2  0 -4 -3 -1 -3 -2  2 -4  3 -3 -1 -6\n+# Z -1  0  0  1 -4  4  5 -3  0 -4 -3  1 -1 -4 -2  0 -1 -3 -3 -3  0 -3  5 -1 -6\n+# X -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -6\n+# * -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 -6  1\n+#\n+# Coordinates are 0-based.  For - strand matches, coordinates\n+# in the reverse complement of the 2nd sequence are used.\n+#\n+# name start alnSize strand seqSize alignment\n+#\n+# m=1 s=35 d=1 c=0.004 t=1e-05 M=7 S=1.7\n+# trans=-62\n+# cismax=97557\n+# GT=8 GC=-3 AT=-8 NN=-10\n+# AG=8 AC=-8 NN=-10\n+#\n+# Query sequences=1\n+a score=4261 mismap=1e-10 don=AT\n+s humanMito 578 1068 + 16571 GTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTCACATCA-CCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATC---AAAAGGGA-CAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAA-CAGCAGTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATA--CTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAAC---TCACCT---GAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTA-CGAAAGTGGCTTTAACATATCT-GAACACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTC-TTGCT----CAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTA-CCCA-CGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAA--CTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAaagtgcacTTGGACGAACCAGAGTGTAGCTTAACA-C--AAAGCACCCAACTTACACTTAGGAG\n+s fuguMito    0 1061 + 16447 GCTAGCGTAGCTTAAC----CAAAGCAGAGTACTGAAGATGCTAAGATGGGCCCTGAAAAGTCCCGCAGGCACAAAAGCTTGGTCCTGACTTTACTAACAACTCTGATCAAACTTACACATGCAAGTATCCGCATCCCAGTGAAaatgccccccg---ccccccgtcCGGAAA'..b'~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~w~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~w~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~s~~~~w~~~~~~~~~~~~~~~~~~~~~~~~~~~~~w~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~wwwwwwwwwwwwwmmmwwwwwwwwwwwwwwwOOOO~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|||{{wSSRRRRRRRRRIIED@@)&&&&%%%%%$$$$$$$$$$$$!!!!!!!!!!!!!!!\n+\n+a score=486 mismap=0.798 acc=GA don=GT\n+s humanMito  2111 190 + 16571 ACAGCTCT-TTGGACACTAGGAAAAAACCTTGT--AGAGAGAGTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCAC----CAATTA-AGAAAGCGTTCAA-GCTCAACACCCACTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGAC-CAATCTATCACC---CTATAGAAGAACTAAT\n+s fuguMito  15651 179 + 16447 tcatatatattaaacattaatataatgcataattaaGACATAGTACTATATATT-CACCTATAGTT--CCTATAA-C--CCATAAAGCAAGTACAGGAagctaaaaatgctaaaagc--ataactggaaaaatccctaa----aaattgt-----tcaaa---aactgaacgaaatTTAAGACCGAACAATA----AACTCAT\n+p                             !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n+\n+a score=117 mismap=0.839 acc=AG don=CC\n+s humanMito  2455 39 + 16571 GTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCG\n+s fuguMito  15832 29 + 16447 GTTAAGATA--TACCAGG-ACTC----AAC---ACCCCG\n+p                            !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n+\n+a score=751 mismap=1.48e-06 acc=AG don=GT\n+s humanMito  6049 315 + 16571 GTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGG-CAACTGAC--TAG----TTCCCCTAAtaaTCGGTGCCCCCGATATGGCGTT---TCCCCGCATAAACAACATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCT-ATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTAGCAGGGAAC-TACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTAC-----ACCTAGCAG\n+s fuguMito  15861 285 + 16447 TAAAATACCA-ATTATTAATGTAGTAAGAACCGACCAT-CAGTTG-----AT-TTCT------TAATGCATATTATTATTGAAGGTGAGGGACAA-TAACAGTGGGGGTTTCACTAAATGAACTATTCC-------TGGCATTTGGTTCCTACTTCAGGGCCATTA--ATC-GATTTATTCCTCATTCTTTCAT-CGAC-GCTGACATAAGTTGTTGGTGGAGTTC--------ATCAG--TG---AGATAATCC-CACAT-GCCGGG--CGTTCTCTC-CACAGGGGTCA-Ggttattttttctctctt-tcctttcaaTTGACATTTCAG\n+p                             """"""""""""""""""&&&&&&&&&&&&&\'\'555666666999999999999999999999<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<=ZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[ZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[QQQQQQQQQQQQQQQQOOQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQQPPLLLKKKKKKKKKKKKKKKKKKKKJIF<977777744444444\n+\n+a score=564 mismap=0.000243 acc=AG don=GC\n+s humanMito    94 223 + 16571 ACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCT---TTGATTCCTGCCTCATTCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTAAAGTGTGTTAATta-attaaTGCT-TGTAGGACATAATAATAACAATTGAATGTCTGCA--CAGCCGCTTTCCACACAGACATCATAACAaaaaatttccac-caaacccccccctcccccc\n+s fuguMito  16146 196 + 16447 A-----GTGC---AGCGC-----GTCAATGGTTCA-TCAAGGTTGAA-CAT---TTTTTCT-TGGTTTATGGTA---ATGTTAATTAATGAATTAAGACAT---TATTTAAG------AATTACATTACTGATATCAAGGACATAA--ATAATAATACGAT-TCAACAATCATACAATTTCAcccccttcttctttttaaaaaaatt-aacgtataccccccctacccccc\n+p                             333333336?????@@@@@@@@@@@@@@@@@@@@@@@@@@@@AAAAAABBBBBBCCDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEED@@:\n+\n+a score=255 mismap=0.516 acc=AG\n+s humanMito  2030 114 + 16571 ATAGAATCTTAGTTCAA-CTTTAAATTTG-CCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTCCAAAGAGGAACAGCTCTTTGGACACTAGGA-AAAAACCTTGTA\n+s fuguMito  16342 104 + 16447 ctaAAAAATAGGAGAGACCTTTAAGTTTGAACCA-AG----CTCT----CCActt----aattaaatattcatcatattattatcatatattataatattataataatataattata\n+p                             #####################################################################################################################\n+\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/last_train.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/last_train.txt Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,504 @@\n+# lastal version: 1021\n+# maximum percent identity: 100\n+# scale of score parameters: 4.5512\n+# scale used while training: 91.024\n+\n+# lastal -j7 -D1000000 -E10 -s1 -S1 -T0 -m10 -k1 -P1 -X0 -Q0 -r5 -q5 -a15 -b3 -A15 -B3 ref_db /tmp/tmpikDSIL/tmp/tmpDOHHwB\n+\n+# aligned letter pairs: 14901\n+# deletes: 694\n+# inserts: 834\n+# delOpens: 350\n+# insOpens: 402\n+# alignments: 1\n+# mean delete size: 1.98286\n+# mean insert size: 2.07463\n+# matchProb: 0.951836\n+# delOpenProb: 0.0223571\n+# insOpenProb: 0.0256787\n+# delExtendProb: 0.495677\n+# insExtendProb: 0.517986\n+\n+# substitution percent identity: 71.9408\n+\n+# count matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 3291           532            387            390           \n+# C 552            3221           180            685           \n+# G 235            102            1561           84.7          \n+# T 335            554            146            2651          \n+\n+# probability matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 0.220773       0.0356887      0.0259615      0.0261627     \n+# C 0.0370303      0.216077       0.0120751      0.0459525     \n+# G 0.0157647      0.00684256     0.104718       0.00568201    \n+# T 0.0224731      0.0371645      0.00979425     0.177839      \n+\n+# delExistCost: 346\n+# insExistCost: 342\n+# delExtendCost: 62\n+# insExtendCost: 58\n+\n+# score matrix (query letters = columns, reference letters = rows):\n+#        A      C      G      T\n+# A     76    -90    -59   -105\n+# C    -87     73   -129    -54\n+# G    -88   -164    145   -167\n+# T   -112    -66   -127     90\n+\n+# lastal -j7 -D1000000 -E10 -s1 -S1 -T0 -m10 -k1 -P1 -X0 -Q0 -t91.4322 -p- ref_db /tmp/tmpikDSIL/tmp/tmpDOHHwB\n+\n+# aligned letter pairs: 14771\n+# deletes: 379.3\n+# inserts: 567.1\n+# delOpens: 185.2\n+# insOpens: 246.1\n+# alignments: 2\n+# mean delete size: 2.04806\n+# mean insert size: 2.30435\n+# matchProb: 0.971438\n+# delOpenProb: 0.01218\n+# insOpenProb: 0.0161851\n+# delExtendProb: 0.511732\n+# insExtendProb: 0.566038\n+\n+# substitution percent identity: 70.0105\n+\n+# count matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 3158           583.4          429.9          391.6         \n+# C 589            3065           156.8          774           \n+# G 252.2          88.64          1571           68.46         \n+# T 342.6          630.1          131.09         2566          \n+\n+# probability matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 0.21341        0.0394248      0.0290516      0.0264634     \n+# C 0.0398032      0.207126       0.0105962      0.0523051     \n+# G 0.0170431      0.00599008     0.106165       0.00462637    \n+# T 0.0231521      0.0425807      0.00885876     0.173404      \n+\n+# delExistCost: 406\n+# insExistCost: 400\n+# delExtendCost: 60\n+# insExtendCost: 51\n+\n+# score matrix (query letters = columns, reference letters = rows):\n+#        A      C      G      T\n+# A     75    -79    -48   -102\n+# C    -78     72   -140    -41\n+# G    -79   -174    146   -185\n+# T   -107    -52   -136     89\n+\n+# lastal -j7 -D1000000 -E10 -s1 -S1 -T0 -m10 -k1 -P1 -X0 -Q0 -t90.8564 -p- ref_db /tmp/tmpikDSIL/tmp/tmpDOHHwB\n+\n+# aligned letter pairs: 14881\n+# deletes: 258.8\n+# inserts: 446.1\n+# delOpens: 112.91\n+# insOpens: 168\n+# alignments: 2\n+# mean delete size: 2.29209\n+# mean insert size: 2.65536\n+# matchProb: 0.981278\n+# delOpenProb: 0.00744548\n+# insOpenProb: 0.0110782\n+# delExtendProb: 0.563717\n+# insExtendProb: 0.623403\n+\n+# substitution percent identity: 68.9366\n+\n+# count matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 3126           618.3          445            406.6         \n'..b'      0.0113452      0.0548495     \n+# G 0.0182832      0.00638041     0.103502       0.00476511    \n+# T 0.0248873      0.0453139      0.00912288     0.169076      \n+\n+# delExistCost: 584\n+# insExistCost: 542\n+# delExtendCost: 32\n+# insExtendCost: 29\n+\n+# score matrix (query letters = columns, reference letters = rows):\n+#        A      C      G      T\n+# A     74    -69    -42    -94\n+# C    -68     70   -132    -35\n+# G    -70   -166    146   -180\n+# T    -99    -45   -132     88\n+\n+# lastal -j7 -D1000000 -E10 -s1 -S1 -T0 -m10 -k1 -P1 -X0 -Q0 -t90.815 -p- ref_db /tmp/tmpikDSIL/tmp/tmpDOHHwB\n+\n+# aligned letter pairs: 14981\n+# deletes: 197\n+# inserts: 384.7\n+# delOpens: 57.97\n+# insOpens: 104.51\n+# alignments: 2\n+# mean delete size: 3.39831\n+# mean insert size: 3.68099\n+# matchProb: 0.989075\n+# delOpenProb: 0.00382729\n+# insOpenProb: 0.00689995\n+# delExtendProb: 0.705736\n+# insExtendProb: 0.728334\n+\n+# substitution percent identity: 67.93\n+\n+# count matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 3093           648.8          451.2          427.6         \n+# C 656            2998           169.9          821.4         \n+# G 273.8          95.75          1550           71.36         \n+# T 372.7          678.6          135.61         2532          \n+\n+# probability matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 0.206534       0.0433235      0.0301288      0.0285529     \n+# C 0.0438042      0.200191       0.011345       0.0548488     \n+# G 0.0182829      0.00639368     0.103501       0.00476505    \n+# T 0.024887       0.0453133      0.00905532     0.169074      \n+\n+# delExistCost: 586\n+# insExistCost: 543\n+# delExtendCost: 32\n+# insExtendCost: 29\n+\n+# score matrix (query letters = columns, reference letters = rows):\n+#        A      C      G      T\n+# A     74    -69    -42    -94\n+# C    -68     70   -132    -35\n+# G    -70   -166    147   -180\n+# T    -99    -45   -132     88\n+\n+# lastal -j7 -D1000000 -E10 -s1 -S1 -T0 -m10 -k1 -P1 -X0 -Q0 -t91.0834 -p- ref_db /tmp/tmpikDSIL/tmp/tmpDOHHwB\n+\n+# aligned letter pairs: 14981\n+# deletes: 196\n+# inserts: 384.7\n+# delOpens: 57.87\n+# insOpens: 104.51\n+# alignments: 2\n+# mean delete size: 3.3869\n+# mean insert size: 3.68099\n+# matchProb: 0.989081\n+# delOpenProb: 0.00382071\n+# insOpenProb: 0.0069\n+# delExtendProb: 0.704745\n+# insExtendProb: 0.728334\n+\n+# substitution percent identity: 67.93\n+\n+# count matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 3093           648.8          451.2          427.6         \n+# C 656            2998           169.9          821.4         \n+# G 273.8          95.75          1550           71.36         \n+# T 372.7          678.6          135.61         2532          \n+\n+# probability matrix (query letters = columns, reference letters = rows):\n+#   A              C              G              T             \n+# A 0.206534       0.0433235      0.0301288      0.0285529     \n+# C 0.0438042      0.200191       0.011345       0.0548488     \n+# G 0.0182829      0.00639368     0.103501       0.00476505    \n+# T 0.024887       0.0453133      0.00905532     0.169074      \n+\n+# delExistCost: 586\n+# insExistCost: 543\n+# delExtendCost: 32\n+# insExtendCost: 29\n+\n+# score matrix (query letters = columns, reference letters = rows):\n+#        A      C      G      T\n+# A     74    -69    -42    -94\n+# C    -68     70   -132    -35\n+# G    -70   -166    147   -180\n+# T    -99    -45   -132     88\n+\n+#last -X 0\n+#last -Q 0\n+#last -t4.80115\n+#last -a 29\n+#last -A 28\n+#last -b 2\n+#last -B 1\n+#last -s 1\n+#last -S 1\n+# score matrix (query letters = columns, reference letters = rows):\n+       A      C      G      T\n+A      4     -3     -2     -5\n+C     -3      4     -7     -2\n+G     -3     -8      7     -9\n+T     -5     -2     -7      4\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/lastdb.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/lastdb.loc Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,6 @@
+# This is a test file distributed with the LAST wrapper for
+# defining a list of LAST databases used in functional
+# tests for LAST tools
+#
+humdb humdb Human mitochondrial genome ${__HERE__}/humdb
+hedgdb hedgdb Hedgehog human protein ${__HERE__}/hedgdb
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/outfile_convert.axt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outfile_convert.axt Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,16 @@\n+0 humanMito 585 14113 fuguMito 7 13715 + 22420\n+GTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC-ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCAC-CACGATCAAAAGGGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAA-CAGCAGTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTA-CGAAAGTGGCTTTAACATATCTGAACACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCT----CAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGT-AAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATAT----AGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAaagtgcacTTGGACGAACCAGAGTGTAGCTTAACA---CAAAGCACCCAACTTACACTTAGGAGAT----------TTCAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCACCT-------TACTACCAGACAACCTTAGCCAAACCAT--------TTACCCAAATAAAGTATAGGCGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAG-----------ATGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGC--TAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGA-TAGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACT----------------GTTAGTCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACAC--------------CCATAGTAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACC--CACTACCTAAAAAATCCCAAACATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTCC--GCATAAGCCTGCGTCAGATCAAAACACTGAACTGACAATTAACAGCCCAATA-------------------TCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTA--CCCCGCCTGTTTACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCCCAGTGAC-ACATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCTTACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATGACACAGCAAGACGAGAAGACCCTATGGAGCTTTAATTTATTAATGCAAACAGTACCTAA---CAAACCCACAGGTCCTAAACTACCAAACCTGCATTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAacctccgagcagtaCATGCTAAGACTTCACCAGTCAAAGCG-------------------AACTACTATACTCAATTGATCCAATAAC-TTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTTCTATCTACTTCA-AATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTACTTCACAA--AGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTA---TACCCACACCCACCCAAGAACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAaaactTTACAGTCAGAGGTTCAATTCCTCTTCTTAACAAC----------------ATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCACATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCCTTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTGGCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTTCGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATTATtataataAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGACGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGCTACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTAGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAA---GAAATATGTCTGATA-AAAGAGTTACTTTGATAGAGTAAATAATAGGAGCTTAAACCCCCTTATTTC--TAGGACTATGAGAATCGAACCCATC'..b'AAAACACCTGTAATATCAATACCTATAGTTGTTAAACTTACTGCTCTTATCGTCACAATCTTGGGACTCCTAATTGCCCTAGAATTAGCTTCCTTAACCTCCAAACAACTTAAACCTACACCACACCTGTCCCCCCACCACTTCTCAAACATACTTGGCTTCTTCCCAACAATTGTACACCGTGCCTCTCCTAAAATTAATCTCATTTTAGGACAAACAATTGCTACCCAAATTATCGACCTAACCTGACTAGAAAAAGTTGGACCCAAAACAATTTCATCTATCAACACTCCCCTCATCTCTACCATCAGTAACATCCAACAAGGATCAATCAAGACATAccttgtcctcttcctc\n+\n+1 humanMito 14433 16033 fuguMito 14030 15634 + 2873\n+GCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATAAATtaaaaaaactattaaacCCATATAACCTCCCCC-----AAAATTCAGAATAATAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCCTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATcttacccttcaTTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTcttccttctctccttaatGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATACTTCACAACAATCCTaatcctaatacCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATA-CACCAGTCTTGTAAAC-------CGGAGACGAAAACCTTTTTCCAAGGACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCAT\n+GCCTTAGGATCCTTATCTGCAGACAGCGCTGacgaataaataaacacaaccaGTATACCCCCCATATAAATCATTAACAACACCAAAGACAAGAAAGTTCCCCCGTGCAAAACTGAAGCCCCACAGCAAAGAAGTGCAACCAGCACCAAATTG-----AAAACTCCATAAAAAGGAGCAGGATTTGTAGACAACACAATTATTACAACCAACATCcctaataaaagaaaaacaagcgcATAAAACATAGTTTCTGCCAGGATTTTAACCAGGACCTATGGCGTGAAAAACCATCGTTGTTACTCAACTACAAAAACACTAATGGCCAGCCTACGCAAAACCCACCCCCTACTAAAAATCGTAAACGACATAGTAATTGACCTTCCTACCCCCTCAAACATTTCCGCCTGATGAAACTTTGGCTCTCTACTCGGATTATGCCTTATTACACAAATCATCACAGGACTGTTCCTTGCAATACACTACACATCCGACATCTCTACCGCCTTTTCATCCGTAGCCCACATTTGCCGAGACGTAAACTACGGCTGACTAATTCGCAATCTACACGCAAACGGTGcctcattcttttttatttgcttaTACTCCCACATCGGCCGAGGTCTTTACTATGGCTCTTACCTAAGTAAAGAAACCTGAAACGTAGGGGTAGTCCTCTTACTTTTAGTAATGGCCACCGCTTTCGTAGGCTACGTTCTTCCATGAGGACAAATATCCTTCTGAGGCGCCACTGTAATTACAAACCTGCTCTCTGCTGTCCCCTACGTAGGAAACACGCTCGTTCAATGAGTATGAGGAGGCTTTTCAGTAGACAGCGCCACTCTAACACGATTCTTtgccttccacttcctcctcccatTTATCGTTGCAGCCGCTGCCATCGtacatcttatttttcttcacgAAACAGGCTCCAACAATCCCCTAGGACTCAATTCAAACGCAGACAAAATCCCATTCCACCCATACTTCTCTTACAAAGACCTCCTGGGCTTCACAATCATACTCTCAGCCCTCGCAACACTCGCCCTATTCTCTCCAAACTACCTCGGAGACCCTGACAACTTCACACCAGCCAATCCTCTAGTTACCCCCGCCCACATTAAACCAGAATGGTATTTCCTATTTGCATACGCAATTCTACGATCTATCCCCAATAAGCTAGGAGGTGTTCTGGCCCTTCTTGCCTCAATCTTAATTCTTATAGTAGTTCCTTTCTTACACACCTCTAAACAACGAAGCCTAACATTCCGCCCACTATCACAATTCCTATTCTGAACCCTAATTGCCGACGTCGTCATCCTAACCTGAATTGGAGGCATGCCCGTCGAACATCCTTACATTATTATCGGACAAATTGCCTCAGTACTTTACTTCTCTCTCTTCCTAATCTTGATGCCAATAGCCGGTTGACTAGAAAATAAAATACTAAACT-AACAAGCATTAGTAGCTCAGATTCAGAGCGTCGGTCTTGTAAACCGAATGTCGGGGGTTAAAATCCCCCCTTATGCTCAA---AAAGAAGGGACTTCAACCCCCACCACTGGCTCCCAAAGCCAGCATTCTTAATTAAACTACTTTTTGataatacat\n+\n+2 humanMito 16457 16527 fuguMito 15936 16006 + 178\n+GGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGCCATAAA\n+GGGACAATAACAGTGGGGGTTTCACTAAATGAACTATTCCTGGCATTTGGTTCCTACTTCAGGGCCATTAA\n+\n+3 humanMito 5827 5875 fuguMito 4681 4729 - 118\n+TGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTT\n+TGGACAAGAGGGTTTTAACCTCTGTTTTTAGAATCACAATCTAATGTTT\n+\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/outfile_convert.blast
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outfile_convert.blast Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,1075 @@\n+Query= fuguMito\n+         (16447 letters)\n+\n+>humanMito\n+          Length = 16571\n+\n+ Score = 6.92e+03 bits (22420), Expect = 0\n+ Identities = 9042/13813 (65%), Gaps = 388/13813 (2%)\n+ Strand = Plus / Plus\n+\n+Query: 7   GTAGCTTAAC----CAAAGCAGAGTACTGAAGATGCTAAGATGGGCCCTGAAAAGTCCCG 62\n+           |||||||| |    |||||||    |||||| ||| | ||| |||| |   |    ||| \n+Sbjct: 585 GTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC-ACATCACCCCA 643\n+\n+Query: 63  CAGGCACAAAAGCTTGGTCCTGACTTTACTAACAACTCTGATCAAACTTACACATGCAAG 122\n+            |  || | | | ||||||||  | || |||  | |||| |  ||  |||||||||||||\n+Sbjct: 644 TAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAG 703\n+\n+Query: 123 TATCCGCATCCCAGTGAAaatgccccccgccccccgtcCGGAAATAGGGAGTTGGTATCA 182\n+            |||| | | |||||||     ||| |     | |   ||   | | ||     | ||||\n+Sbjct: 704 CATCCCCGTTCCAGTGAGTTCACCCTCTAAATCAC-CACGATCAAAAGGGACAAGCATCA 762\n+\n+Query: 183 GGCACACAAATTTGTAGCCCATGACACCTAGCTTTGCCACGCCCCCAAGGGAATTCAGCA 242\n+            |||| ||    || ||| ||  || | |||| | ||||| |||||| |||||  |||||\n+Sbjct: 763 AGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAA-CAGCA 821\n+\n+Query: 243 GTGATAAACATTAAGCCATAAGTGAAAACTTGACTTAGTTATGAT--CTAAAGAGTCGGT 300\n+           ||||| ||| || ||| ||||  ||||  || ||| || |||  |  |   || || |||\n+Sbjct: 822 GTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGT 881\n+\n+Query: 301 AAAACTCGTGCCAGCCACCGCGGTTATACGAGAGACCCAAGTTGTTAGCCAACGGCGTAA 360\n+            ||  ||||||||||||||||||| | ||||   ||||||||   |||    ||||||||\n+Sbjct: 882 CAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAA 941\n+\n+Query: 361 AGGGTGGTTAgaactaaaaacaacaaactgagACCGAACACCTTCAAGGCTGTTATACGC 420\n+           || ||| ||   |  |    |  |  | | |  |  ||   |  |   | ||| | |  |\n+Sbjct: 942 AGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAAC 1001\n+\n+Query: 421  TTCCGAAGCAACGAAGAACAATAACGAAAGTAGCCTCACTAACTC-GAACCCACGAAAGC 479\n+            | | |  |  || ||  | | || ||||||| || | |  |  || |||| ||| | |||\n+Sbjct: 1002 TCCAGTTGACACAAAATAGACTA-CGAAAGTGGCTTTAACATATCTGAACACACAATAGC 1060\n+\n+Query: 480  TAGGACACAAACTGGGATTAGATACCCCACTATGCCTACCCCTAAACACGATATG---AA 536\n+            || ||| |||||||||||||||||||||||||||| || ||||||||   |   |   ||\n+Sbjct: 1061 TAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAA 1120\n+\n+Query: 537  ACTACGTACATATCCGCCTGGTTACTACGAGCATTAGCTTAAAACCCAAAGGACTTGGCG 596\n+             | ||  |  |   |||| |   |||||||||   |||||||||| |||||||| |||||\n+Sbjct: 1121 TCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCG 1180\n+\n+Query: 597  GTGCTTTAAAACCATCTAGAGGAGCCTGTTTTAAAACCGATACTCCCCGTTCAACCTCAC 656\n+            |||||| | | || |||||||||||||||| |  || |||||  ||||| ||||||||||\n+Sbjct: 1181 GTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCAC 1240\n+\n+Query: 657  CCCTCCTTGTTTTAACCGCCTATATACCACCGTCGTCAGCCTACCCTG-TGAAGGGCAAA 715\n+            | |  |||| |    | ||||||||||| || || |||||  |||||| ||||||  | |\n+Sbjct: 1241 CACCTCTTGCT----CAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACA 1296\n+\n+Query: 716  TAGTAGACAAAATTGGCACAGCCAAAAACGTCAGGTCGAGGTGTAGCGAATGGAGGGGGA 775\n+             ||||  |  || |  |   |  ||| |||| ||||| |||||||||  |||  | || |\n+Sbjct: 1297 AAGTAAGCGCAAGTACCCACGT-AAAGACGTTAGGTCAAGGTGTAGCCCATGAGGTGGCA 1355\n+\n+Query: 776  CAAAATGGGCTACATTCTCTGCCTAGAGAA--CACGAAAGATGTGCTGAAATGCACAC-C 832\n+              |||||||||||||| ||| ||     ||   |||| ||   |  |||||   |     \n+Sbjct: 1356 AGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTATGAAACTTAAGGGT 1415\n+\n+Query: 833  CGAAGGAGGATTTAGCAGTAAGCAAGAAATAGAGTGTCATGCTGAAACCGGCTATGAAGC 892\n+            |||||| |||||||||||||| |    | |||||||    | ||||   |||  ||||||\n+Sbjct: 1416 CGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGC 1475\n+\n+Query: 893  GCGCACACACCGCCCGTCACTCTCCCCAAActctt---------aatttaaaaataacta 943\n+            ||| |||||||||||||||| |||| |||                ||||||  | |||  \n+Sbjct: 1476 GCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCC 1535\n+\n+Query: 944  ATAAGCCAccaaaagaaaaggggaggcAAGTCGTAACATGGTAAGTGTACCGGAAGGT'..b'  |||||||||||  |||\n+Sbjct: 15208 CATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACAGTCCCA 15267\n+\n+Query: 14865 CTCTAACACGATTCTTtgccttccacttcctcctcccatTTATCGTTGCAGCCGCTGCCA 14924\n+             | || |||||||||||| |||| |||||| || | || || ||  ||||||||   ||  \n+Sbjct: 15268 CCCTCACACGATTCTTTACCTTTCACTTCATcttacccttcaTTATTGCAGCCCTAGCAG 15327\n+\n+Query: 14925 TCGtacatcttatttttcttcacgAAACAGGCTCCAACAATCCCCTAGGACTCAATTCAA 14984\n+                | || ||  | ||  | |||||||| || || ||||| ||||||||| |||  ||  \n+Sbjct: 15328 CACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCC 15387\n+\n+Query: 14985 ACGCAGACAAAATCCCATTCCACCCATACTTCTCTTACAAAGACCTCCTGGGCTTCACAA 15044\n+             |  | || |||||| | |||||||| |||| | |   |||||||  ||| |||||     \n+Sbjct: 15388 ATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTC 15447\n+\n+Query: 15045 TCATACTCTCAGCCCTCGCAACACTCGCCCTATTCTCTCCAAACTACCTCGGAGACCCTG 15104\n+             || | ||     || |    ||| |  | |||||||| ||| ||  ||| || ||||| |\n+Sbjct: 15448 TcttccttctctccttaatGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAG 15507\n+\n+Query: 15105 ACAACTTCACACCAGCCAATCCTCTAGTTACCCCCGCCCACATTAAACCAGAATGGTATT 15164\n+             |||| |  || | |||||| ||  ||   |||||  ||||||| || || ||||| ||||\n+Sbjct: 15508 ACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATT 15567\n+\n+Query: 15165 TCCTATTTGCATACGCAATTCTACGATCTATCCCCAATAAGCTAGGAGGTGTTCTGGCCC 15224\n+             ||||||| || ||| ||||||| |||||  |||| || || |||||||| || || ||||\n+Sbjct: 15568 TCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCC 15627\n+\n+Query: 15225 TTCTTGCCTCAATCTTAATTCTTATAGTAGTTCCTTTCTTACACACCTCTAAACAACGAA 15284\n+             |  |    || ||| | || ||   | || | ||  || | || |  || ||||||| ||\n+Sbjct: 15628 TATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAA 15687\n+\n+Query: 15285 GCCTAACATTCCGCCCACTATCACAATTCCTATTCTGAACCCTAATTGCCGACGTCGTCA 15344\n+             || ||| ||| |||||||||   ||||  || |  |||  ||||   || ||| || |||\n+Sbjct: 15688 GCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCA 15747\n+\n+Query: 15345 TCCTAACCTGAATTGGAGGCATGCCCGTCGAACATCCTTACATTATTATCGGACAAATTG 15404\n+             | ||||||||||| |||||    || ||     | ||||  |  || || |||||| | |\n+Sbjct: 15748 TTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAG 15807\n+\n+Query: 15405 CCTCAGTACTTTACTTCTCTCTCTTCCTAATCTTGATGCCAATAGCCGGTTGACTAGAAA 15464\n+             | || ||||| |||||| |     |||||||| | || ||||    |     | | ||||\n+Sbjct: 15808 CATCCGTACTATACTTCACAACAATCCTaatcctaatacCAACTATCTCCCTAATTGAAA 15867\n+\n+Query: 15465 ATAAAATACTAAACT-AACAAGCATTAGTAGCTCAGATTCAGAGCGTCGGTCTTGTAAAC 15523\n+             | |||||||| || |   |  |   | ||||   | | | | | |  | |||||||||||\n+Sbjct: 15868 ACAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATA-CACCAGTCTTGTAAAC 15926\n+\n+Query: 15524 CGAATGTCGGGGGTTAAAATCCCCCCTTATGCTCAA---AAAGAAGGGACTTCAACCCCC 15580\n+                    ||| |   |||| |       | |  |||   | ||||      |  | | ||\n+Sbjct: 15927 -------CGGAGACGAAAACCTTTTTCCAAGGACAAATCAGAGAAAAAGTCTTTAACTCC 15979\n+\n+Query: 15581 ACCACTGGCTCCCAAAGCCAGCATTCTTAATTAAACTACTTTTTGataatacat 15634\n+             |||| | || |||||||| |  ||||| | |||||||| | | || |  | |||\n+Sbjct: 15980 ACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCAT 16033\n+\n+>humanMito\n+          Length = 16571\n+\n+ Score = 57.2 bits (178), Expect = 3.3e-09\n+ Identities = 53/71 (74%)\n+ Strand = Plus / Plus\n+\n+Query: 15936 GGGACAATAACAGTGGGGGTTTCACTAAATGAACTATTCCTGGCATTTGGTTCCTACTTC 15995\n+             ||| | |||||| | |||| |     || |||||| |  | | ||| |||||||||||||\n+Sbjct: 16457 GGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTC 16516\n+\n+Query: 15996 AGGGCCATTAA 16006\n+             |||||||| ||\n+Sbjct: 16517 AGGGCCATAAA 16527\n+\n+>humanMito\n+          Length = 16571\n+\n+ Score = 38.7 bits (118), Expect = 0.0012\n+ Identities = 35/49 (71%)\n+ Strand = Minus / Plus\n+\n+Query: 11767 TGGACAAGAGGGTTTTAACCTCTGTTTTTAGAATCACAATCTAATGTTT 11719\n+             |||  || || |   ||||| |||| |||||| | ||| || |||| ||\n+Sbjct: 5827  TGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTT 5875\n+\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/outfile_convert.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outfile_convert.html Wed Jun 17 14:50:21 2020 -0400
b
b'@@ -0,0 +1,1085 @@\n+\n+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"\n+ "http://www.w3.org/TR/html4/strict.dtd">\n+<html lang="en"><head>\n+<meta http-equiv="Content-type" content="text/html; charset=UTF-8">\n+<title>Reliable Alignments</title>\n+<style type="text/css">\n+/* Try to force monospace, working around browser insanity: */\n+pre {font-family: "Courier New", monospace, serif; font-size: 0.8125em}\n+.a {background-color: #3333FF}\n+.b {background-color: #9933FF}\n+.c {background-color: #FF66CC}\n+.d {background-color: #FF3333}\n+.e {background-color: #FF9933}\n+.f {background-color: #FFFF00}\n+.key {display:inline; margin-right:2em}\n+</style>\n+</head><body>\n+\n+<div style="line-height:1">\n+<pre class="key"><span class="a">  </span> prob &gt; 0.999</pre>\n+<pre class="key"><span class="b">  </span> prob &gt; 0.99 </pre>\n+<pre class="key"><span class="c">  </span> prob &gt; 0.95 </pre>\n+<pre class="key"><span class="d">  </span> prob &gt; 0.9  </pre>\n+<pre class="key"><span class="e">  </span> prob &gt; 0.5  </pre>\n+<pre class="key"><span class="f">  </span> prob &le; 0.5  </pre>\n+</div>\n+\n+<h3>Alignment score=22420, expect=0:</h3>\n+<pre>\n+humanMito 585 GTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC-ACATCACCCCA 643\n+fuguMito    7 GTAGCTTAAC----CAAAGCAGAGTACTGAAGATGCTAAGATGGGCCCTGAAAAGTCCCG  62\n+              ******** *    *******    ****** *** * *** **** *   *    *** \n+\n+humanMito 644 TAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAG 703\n+fuguMito   63 CAGGCACAAAAGCTTGGTCCTGACTTTACTAACAACTCTGATCAAACTTACACATGCAAG 122\n+               *  ** * * * ********  * ** ***  * **** *  **  *************\n+\n+humanMito 704 CATCCCCGTTCCAGTGAGTTCACCCTCTAAATCAC-CACGATCAAAAGGGACAAGCATCA 762\n+fuguMito  123 TATCCGCATCCCAGTGAAaatgccccccgccccccgtcCGGAAATAGGGAGTTGGTATCA 182\n+               **** * * *******     *** *     * *   **   * * **     * ****\n+\n+humanMito 763 AGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAA-CAGCA 821\n+fuguMito  183 GGCACACAAATTTGTAGCCCATGACACCTAGCTTTGCCACGCCCCCAAGGGAATTCAGCA 242\n+               **** **    ** *** **  ** * **** * ***** ****** *****  *****\n+\n+humanMito 822 GTGATTAACCTTTAGCAATAAACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGT 881\n+fuguMito  243 GTGATAAACATTAAGCCATAAGTGAAAACTTGACTTAGTTATGAT--CTAAAGAGTCGGT 300\n+              ***** *** ** *** ****  ****  ** *** ** ***  *  *   ** ** ***\n+\n+humanMito 882 CAATTTCGTGCCAGCCACCGCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAA 941\n+fuguMito  301 AAAACTCGTGCCAGCCACCGCGGTTATACGAGAGACCCAAGTTGTTAGCCAACGGCGTAA 360\n+               **  ******************* * ****   ********   ***    ********\n+\n+humanMito 942 AGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAAC 1001\n+fuguMito  361 AGGGTGGTTAgaactaaaaacaacaaactgagACCGAACACCTTCAAGGCTGTTATACGC  420\n+              ** *** **   *  *    *  *  * * *  *  **   *  *   * *** * *  *\n+\n+humanMito 1002 TCCAGTTGACACAAAATAGACTA-CGAAAGTGGCTTTAACATATCTGAACACACAATAGC 1060\n+fuguMito   421 TTCCGAAGCAACGAAGAACAATAACGAAAGTAGCCTCACTAACTC-GAACCCACGAAAGC  479\n+               * * *  *  ** **  * * ** ******* ** * *  *  ** **** *** * ***\n+\n+humanMito 1061 TAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAA 1120\n+fuguMito   480 TAGGACACAAACTGGGATTAGATACCCCACTATGCCTACCCCTAAACACGATATG---AA  536\n+               ** *** **************************** ** ********   *   *   **\n+\n+humanMito 1121 TCAACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCG 1180\n+fuguMito   537 ACTACGTACATATCCGCCTGGTTACTACGAGCATTAGCTTAAAACCCAAAGGACTTGGCG  596\n+                * **  *  *   **** *   *********   ********** ******** *****\n+\n+humanMito 1181 GTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCAC 1240\n+fuguMito   597 GTGCTTTAAAACCATCTAGAGGAGCCTGTTTTAAAACCGATACTCCCCGTTCAACCTCAC  656\n+               ****** * * ** **************** *  ** *****  ***** **********\n+\n+humanMito 1241 CACCTCTTGCT----CAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACA 1296\n+fuguMito   657 CCCTCCTTGTTTTAACCGCCTATATACCACCGTCGTCAGCCTACCCTG-TGAAGGGCAAA  715\n+               * *  **** *    * ***********'..b'4\n+                * *** * ** *     ** ********* * **********  ***********  ***\n+\n+humanMito 15268 CCCTCACACGATTCTTTACCTTTCACTTCATcttacccttcaTTATTGCAGCCCTAGCAG 15327\n+fuguMito  14865 CTCTAACACGATTCTTtgccttccacttcctcctcccatTTATCGTTGCAGCCGCTGCCA 14924\n+                * ** ************ **** ****** ** * ** ** **  ********   **  \n+\n+humanMito 15328 CACTCCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCC 15387\n+fuguMito  14925 TCGtacatcttatttttcttcacgAAACAGGCTCCAACAATCCCCTAGGACTCAATTCAA 14984\n+                   * ** **  * **  * ******** ** ** ***** ********* ***  **  \n+\n+humanMito 15388 ATTCCGATAAAATCACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTC 15447\n+fuguMito  14985 ACGCAGACAAAATCCCATTCCACCCATACTTCTCTTACAAAGACCTCCTGGGCTTCACAA 15044\n+                *  * ** ****** * ******** **** * *   *******  *** *****     \n+\n+humanMito 15448 TcttccttctctccttaatGACATTAACACTATTCTCACCAGACCTCCTAGGCGACCCAG 15507\n+fuguMito  15045 TCATACTCTCAGCCCTCGCAACACTCGCCCTATTCTCTCCAAACTACCTCGGAGACCCTG 15104\n+                ** * **     ** *    *** *  * ******** *** **  *** ** ***** *\n+\n+humanMito 15508 ACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACATCAAGCCCGAATGATATT 15567\n+fuguMito  15105 ACAACTTCACACCAGCCAATCCTCTAGTTACCCCCGCCCACATTAAACCAGAATGGTATT 15164\n+                **** *  ** * ****** **  **   *****  ******* ** ** ***** ****\n+\n+humanMito 15568 TCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTAGGAGGCGTCCTTGCCC 15627\n+fuguMito  15165 TCCTATTTGCATACGCAATTCTACGATCTATCCCCAATAAGCTAGGAGGTGTTCTGGCCC 15224\n+                ******* ** *** ******* *****  **** ** ** ******** ** ** ****\n+\n+humanMito 15628 TATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAACAACAAA 15687\n+fuguMito  15225 TTCTTGCCTCAATCTTAATTCTTATAGTAGTTCCTTTCTTACACACCTCTAAACAACGAA 15284\n+                *  *    ** *** * ** **   * ** * **  ** * ** *  ** ******* **\n+\n+humanMito 15688 GCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCA 15747\n+fuguMito  15285 GCCTAACATTCCGCCCACTATCACAATTCCTATTCTGAACCCTAATTGCCGACGTCGTCA 15344\n+                ** *** *** *********   ****  ** *  ***  ****   ** *** ** ***\n+\n+humanMito 15748 TTCTAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAG 15807\n+fuguMito  15345 TCCTAACCTGAATTGGAGGCATGCCCGTCGAACATCCTTACATTATTATCGGACAAATTG 15404\n+                * *********** *****    ** **     * ****  *  ** ** ****** * *\n+\n+humanMito 15808 CATCCGTACTATACTTCACAACAATCCTaatcctaatacCAACTATCTCCCTAATTGAAA 15867\n+fuguMito  15405 CCTCAGTACTTTACTTCTCTCTCTTCCTAATCTTGATGCCAATAGCCGGTTGACTAGAAA 15464\n+                * ** ***** ****** *     ******** * ** ****    *     * * ****\n+\n+humanMito 15868 ACAAAATACTCAAATGGGCCTGTCCTTGTAGTATAAACTAATA-CACCAGTCTTGTAAAC 15926\n+fuguMito  15465 ATAAAATACTAAACT-AACAAGCATTAGTAGCTCAGATTCAGAGCGTCGGTCTTGTAAAC 15523\n+                * ******** ** *   *  *   * ****   * * * * * *  * ***********\n+\n+humanMito 15927 -------CGGAGACGAAAACCTTTTTCCAAGGACAAATCAGAGAAAAAGTCTTTAACTCC 15979\n+fuguMito  15524 CGAATGTCGGGGGTTAAAATCCCCCCTTATGCTCAA---AAAGAAGGGACTTCAACCCCC 15580\n+                       *** *   **** *       * *  ***   * ****      *  * * **\n+\n+humanMito 15980 ACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCAT 16033\n+fuguMito  15581 ACCACTGGCTCCCAAAGCCAGCATTCTTAATTAAACTACTTTTTGataatacat 15634\n+                **** * ** ******** *  ***** * ******** * * ** *  * ***\n+\n+</pre>\n+<h3>Alignment score=178, expect=3.3e-09:</h3>\n+<pre>\n+humanMito 16457 GGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTC 16516\n+fuguMito  15936 GGGACAATAACAGTGGGGGTTTCACTAAATGAACTATTCCTGGCATTTGGTTCCTACTTC 15995\n+                *** * ****** * **** *     ** ****** *  * * *** *************\n+\n+humanMito 16517 AGGGCCATAAA 16527\n+fuguMito  15996 AGGGCCATTAA 16006\n+                ******** **\n+\n+</pre>\n+<h3>Alignment score=118, expect=0.0012:</h3>\n+<pre>\n+humanMito  5827 TGGTAAAAAGAGGCCTAACCCCTGTCTTTAGATTTACAGTCCAATGCTT  5875\n+fuguMito  11767 TGGACAAGAGGGTTTTAACCTCTGTTTTTAGAATCACAATCTAATGTTT 11719\n+                ***  ** ** *   ***** **** ****** * *** ** **** **\n+\n+</pre>\n+</body></html>\n'
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/outfile_convert.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/outfile_convert.tab Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,35 @@
+# LAST version 1021
+#
+# a=29 b=2 A=28 B=1 e=106 d=63 x=105 y=48 z=105 D=1e+06 E=2.9693e+07
+# R=10 u=2 s=2 S=0 M=0 T=0 m=10 l=1 L=999999999 n=10 k=1 w=1000 t=4.80115 j=3 Q=0
+# ref_db
+# Reference sequences=1 normal letters=16571
+# lambda=0.213867 K=0.208
+#
+#     A   C   G   T   M   S   K   W   R   Y   B   D   H   V
+# A   4  -3  -2  -5   2  -3  -4   1   2  -4  -3   1   0   1
+# C  -3   4  -7  -2   2   2  -3  -2  -4   2   1  -3   1   1
+# G  -3  -8   7  -9  -5   3   3  -5   3  -9   1   1  -6   1
+# T  -5  -2  -7   4  -3  -3   2   1  -6   2   1   0   1  -4
+# M   2   2  -4  -3   2   0  -3   0   0   0  -1  -1   0   1
+# S  -3   2   3  -4   0   2   0  -3   0   0   1  -1  -1   1
+# K  -4  -3   2   2  -4   0   2   0  -1   0   1   1  -1  -1
+# W   1  -2  -4   1   0  -3   0   1   0   0  -1   1   0  -1
+# R   2  -4   3  -6   0   0  -1   0   3  -5  -2   1  -1   1
+# Y  -4   2  -7   2   0   0   0   0  -5   2   1  -1   1  -1
+# B  -4   1   1   1  -1   1   1  -1  -2   1   1   0   0  -1
+# D   1  -3   1   0  -1  -1   1   1   1  -1   0   1   0   0
+# H   0   1  -5   1   0  -1  -1   0  -1   1   0   0   0   0
+# V   1   1   1  -4   1   1  -1  -1   1  -1  -1   0   0   1
+#
+# Coordinates are 0-based.  For - strand matches, coordinates
+# in the reverse complement of the 2nd sequence are used.
+#
+# name start alnSize strand seqSize alignment
+#
+# batch 0
+22420 humanMito 584 13529 + 16571 fuguMito 6 13709 + 16447 10,4:0,34,0:1,106,0:1,78,0:1,50,2:0,156,0:1,21,1:0,69,3:0,133,0:4,33,1:0,33,0:1,67,2:0,26,1:0,96,9:0,30,0:4,71,0:3,28,0:10,54,0:7,28,0:8,30,1:0,38,0:11,73,1:0,62,0:2,96,0:1,20,4:0,40,0:16,71,0:14,27,2:0,25,0:2,39,3:0,72,0:2,28,2:0,22,0:19,103,0:2,39,1:0,28,0:1,210,0:3,113,0:19,28,0:1,204,0:1,48,0:2,42,0:3,50,1:0,21,1:0,32,0:16,957,0:3,15,0:1,51,0:2,57,3:0,99,0:1,415,3:0,606,0:7,45,0:2,11,5:0,76,0:1,62,0:1,18,0:1,11,0:2,72,2:0,27,0:2,36,0:2,15,11:0,1532,0:9,71,0:2,26,0:3,42,0:7,697,7:0,16,5:0,57,3:0,95,0:1,48,4:0,102,0:3,1382,0:2,16,0:2,50,0:1,9,0:3,395,0:5,23,12:0,13,0:3,22,0:9,350,10:0,24,0:8,17,0:5,1285,0:7,14,0:2,30,0:5,25,0:1,25,0:1,86,0:2,41,0:13,541,0:3,880,0:3,189,0:3,65 EG2=0 E=0
+2873 humanMito 14432 1601 + 16571 fuguMito 14029 1605 + 16447 104,0:5,44,5:0,1297,1:0,27,0:1,16,0:7,29,3:0,75 EG2=3e-250 E=1.3e-259
+178 humanMito 16456 71 + 16571 fuguMito 15935 71 + 16447 71 EG2=6.1 E=3.3e-09
+118 humanMito 5826 49 + 16571 fuguMito 4680 49 - 16447 49 EG2=2.3e+06 E=0.0012
+# Query sequences=1
b
diff -r 000000000000 -r 9a7e91fc6562 test-data/tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tool_data_table_conf.xml.test Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="lastdb" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/lastdb.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 9a7e91fc6562 tool-data/lastdb.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/lastdb.loc.sample Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,4 @@
+# This is a sample file distributed with the LAST wrapper that enables tools to
+# use a directory of LAST databases.
+#
+#humdb humdb Human mitochondrial genome /some/path/humdb
b
diff -r 000000000000 -r 9a7e91fc6562 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jun 17 14:50:21 2020 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="lastdb" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/lastdb.loc" />
+    </table>
+</tables>