Repository 'sina'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/sina

Changeset 0:a21965c8bcf1 (2019-10-23)
Next changeset 1:fd62f1b940f6 (2019-10-23)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sina commit 776b362db87d508ca44983eae02dcd4f119b95e8"
added:
README.md
macros.xml
sina.xml
test-data/output_fasta_arb.arb
test-data/output_fasta_fasta.arb
test-data/reference.arb
test-data/reference.fasta
test-data/sequence.fasta
test-data/test_sina_references.loc
tool-data/sina_references.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r a21965c8bcf1 README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Wed Oct 23 18:28:17 2019 -0400
[
@@ -0,0 +1,8 @@
+**Add reference libraries**
+
+[Reference databases](https://www.arb-silva.de/download/arb-files/) provided by the [SILVA project](https://www.arb-silva.de/) can be used within this wrapper by adding the corresponding files to the tool-data directory and editing` tool-data/sina_references.loc.sample` as follows: 
+
+    LSU_Parc ${__HERE__}/SILVA_132_LSUParc_12_12_17_opt.arb
+    LSU_Ref ${__HERE__}/SILVA_132_LSURef_07_12_17_opt.arb
+    SSU_Ref ${__HERE__}/SILVA_132_SSURef_12_12_17_opt.arb
+    Ref_NR_99 ${__HERE__}/SILVA_132_SSURef_NR99_13_12_17_opt.arb
\ No newline at end of file
b
diff -r 000000000000 -r a21965c8bcf1 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Oct 23 18:28:17 2019 -0400
b
@@ -0,0 +1,10 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.5.0</token>
+    <token name="@WRAPPER_VERSION@">galaxy0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">sina</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r a21965c8bcf1 sina.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sina.xml Wed Oct 23 18:28:17 2019 -0400
[
b'@@ -0,0 +1,368 @@\n+<tool id="sina" name="SINA" version="@TOOL_VERSION@+@WRAPPER_VERSION@">\n+    <description>reference based multiple sequence alignment</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements"/>\n+    <stdio></stdio>\n+    <command detect_errors="exit_code"><![CDATA[\n+        ## initialize\n+        ## parse custom reference from fasta to arb file format\n+\n+        #if $db.select == \'custom\'\n+            #if $db.custom.ext == \'fasta\'\n+                sina -i \'$db.custom\' --prealigned -o ./reference.arb &&\n+            #else\n+                ln -s \'$db.custom\' ./reference.arb && \n+            #end if\n+        #elif $db.select == \'local\'\n+            ln -s \'$db.arb_databases.fields.path\' ./reference.arb &&\n+        #end if\n+\n+        ## run\n+        sina\n+            --in=\'$in\'\n+            --db=./reference.arb\n+            -p \\${GALAXY_SLOTS:-4}\n+            --num-pts=\\${GALAXY_SLOTS:-4}\n+\n+            #if $os.type == \'arb\'\n+                --outtype=\'arb\'\n+                --out=output.arb\n+            #elif $os.type == \'fasta\'\n+                --outtype=\'fasta\'\n+                --out=output.fasta\n+                --min-idty=$os.minidty\n+            #elif $os.type == \'fasta_meta\'\n+                --outtype=\'fasta\'\n+                --out=output.fasta\n+                --min-idty=$os.minidty\n+                --meta-fmt=\'csv\'\n+            #end if\n+            #if $log == \'yes\'\n+                --log-file=$logfile\n+            #elif $log == \'yes_meta\'\n+                --log-file=$logfile\n+                --show-diff\n+            #end if\n+\n+            ## Advanced alignment parameters\n+            --overhang=$ap.overhang\n+            --insertion=$ap.insertion\n+            #if $ap.turn == \'yes\'\n+                -t\n+            #elif $ap.turn == \'all\'\n+                -t all\n+            #end if\n+\n+            ## Expert Alignment Parameters\n+            --fs-min=$eap.fsmin\n+            --fs-max=$eap.fsmax\n+            --fs-msc=$eap.fsmsc\n+            --fs-weight=$eap.fsweight\n+            --fs-req=$eap.fsreq\n+            --fs-req-full=$eap.fsreqfull\n+            --fs-full-len=$eap.fsfulllen\n+            --gene-start=$eap.genestart\n+            --gene-end=$eap.geneend\n+            --fs-cover-gene=$eap.fscovergene\n+            --match-score=$eap.matchscore\n+            --mismatch-score=$eap.mismatchscore\n+            --pen-gap=$eap.pengap\n+            --pen-gapext=$eap.pengapext\n+            --fs-kmer-len=$eap.fskmerlen\n+            --fs-kmer-mm=$eap.fskmermm\n+            $eap.fskmernofast\n+            $eap.fskmernorel\n+\n+            ## Advanced search and classification parameters\n+            #if $asacp.activate == \'yes\'\n+                --search\n+                --search-kmer-candidates=$asacp.searchkmercandidates\n+                --lca-quorum=$asacp.lcaquorum\n+                --search-kmer-len=$asacp.searchkmerlen\n+                --search-kmer-mm=$asacp.searchkmermm\n+                --search-max-result=$asacp.searchmaxresult\n+                $asacp.searchnofast\n+                $asacp.searchkmernorel\n+            #end if\n+\n+            ## convert meta file to tabular\n+            #if $os.type == \'fasta_meta\'\n+                && cat output.csv | sed \'s/,/\\t/g\' > output.tsv\n+            #end if\n+    ]]></command>\n+    <!-- Sections and default parameters are based on https://www.arb-silva.de/aligner -->\n+    <inputs>\n+        <param argument="--in" type="data" format="fasta" multiple="false" label="Sequence file" help="FASTA file format"/>\n+        <conditional name="db">\n+            <param name="select" type="select" label="Reference library type" help="">\n+                <option value="custom" selected="true">Custom</option>\n+                <option value="local">Local cached</option>\n+            </param>\n+            <when value="custom">\n+                <param name="custom" type="data" format="data" label="Reference library file" help="FASTA or ARB file fo'..b'   <param name="pengap" value="6"/>\n+                <param name="pengapext" value="1"/>\n+                <param name="fskmerlen" value="9"/>\n+                <param name="fskmermm" value="1"/>\n+                <param name="fskmernofast" value="--fs-kmer-no-fast"/>\n+                <param name="fskmernorel" value=""/>\n+            </section>\n+            <section name="asacp">\n+                <param name="searchkmercandidates" value="1001"/>\n+                <param name="lcaquorum" value="0.9"/>\n+                <param name="searchkmerlen" value="9"/>\n+                <param name="searchkmermm" value="1"/>\n+                <param name="searchnofast" value=""/>\n+                <param name="searchkmernorel" value="--search-kmer-norel"/>\n+                <param name="searchmaxresult" value="10"/>\n+            </section>\n+            <output name="output_fasta">\n+                <assert_contents>\n+                    <has_line line=">FJ203641.1"/>\n+                    <has_line line=">WH051F03"/>\n+                    <has_n_lines n="34"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- #6 in: *.fasta; db: <arb_databases>.arb; out: *.fasta; standard parameters -->\n+        <test>\n+            <param name="in" value="sequence.fasta"/>\n+            <conditional name="db">\n+                <param name="select" value="local"/> \n+                <param name="arb_databases" value="testarb"/>\n+            </conditional>\n+            <section name="os">\n+                <param name="type" value="arb"/>\n+            </section>\n+            <output name="output_arb" file="output_fasta_arb.arb" compare="sim_size" delta="1000"/>\n+        </test>\n+    </tests>\n+\n+    <help><![CDATA[\n+.. class:: infomark\n+\n+**What it does**\n+\n+SINA aligns nucleotide sequences to match a pre-existing MSA using a graph-based alignment algorithm similar to PoA. The graph approach allows SINA to incorporate information from many reference sequences without blurring highly variable regions. While pure NAST implementations are highly dependent on finding a good match in the reference database, SINA is able to align sequences relatively distant to references with good quality and will yield a robust result for query sequences with many close references.\n+\n+While adding sequences to an MSA with SINA is usually faster than re-computing the entire MSA from an augmented set of unaligned sequences, the primary benefit lies in protecting investments made into the original MSA such as manual curation of the alignment, compute-intensive phylogenetic tree reconstruction and taxonomic annotation of the resulting phylogeny. Additionally, SINA includes a homology search which uses the previously computed alignment to determine the most similar sequences.  Based on the search results, a LCA-based classification of the query sequence can be computed using taxonomic classifications assigned to the sequences comprising the reference MSA.\n+\n+SINA is used to compute the large and small subunit ribosomal RNA alignments provided by the `SILVA project <https://www.arb-silva.de/>`_ and is able to use the `ARB format reference databases <https://www.arb-silva.de/download/arb-files/>`_ released by the project.\n+\n+**Input**\n+\n+SINA requires sequences in FASTA file format, whereas libraries can be also provided as ARB files. Furthermore, reference databases can be added as data tables. See README.md for more information.\n+\n+**Output**\n+\n+Results are provided in FASTA or ARB file format, whereas additional metadata is provided as CSV.\n+\n+.. class:: infomark\n+\n+**References**\n+\n+More information can be found on the `project website <https://sina.readthedocs.io/en/latest>`_, and on `github <https://github.com/epruesse/SINA>`_.  An `online version <https://www.arb-silva.de/aligner>`_ of SINA is provided by the SILVA project.\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1093/bioinformatics/bts252</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r a21965c8bcf1 test-data/output_fasta_arb.arb
b
Binary file test-data/output_fasta_arb.arb has changed
b
diff -r 000000000000 -r a21965c8bcf1 test-data/output_fasta_fasta.arb
b
Binary file test-data/output_fasta_fasta.arb has changed
b
diff -r 000000000000 -r a21965c8bcf1 test-data/reference.arb
b
Binary file test-data/reference.arb has changed
b
diff -r 000000000000 -r a21965c8bcf1 test-data/reference.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fasta Wed Oct 23 18:28:17 2019 -0400
b
b'@@ -0,0 +1,282 @@\n+>ThfPende Thermofilum pendens\n+--------------------------------------ACUCCGGUU-GAU-CCUGCCGGACCCG-ACC-GC-UAUC-GGG-GUGG--GGCUAACCCAU-GGAAGU-CUA-GGAG-CC-------------------------------------------------------GGG---------------------------------GCUACG---------------------------------GCC---------------------------------------------------------GG--CUC-CGG-C-GG--A-CGGCUCAGU---AGC-ACGUGGC-UAA--CCU-AC-CCUCGGGA-GG--GGGAU-AA-CCCCG-G-GAAA-CUG-GGG-AUAAACCC-C-CAU-A------G-GCG-C--G-GA--CAC-------------------------------------------------------------------------------------------CUG-GAAU-----------------------------------------------------------------------------------GGG--U-C--C--GC-GCU-GAAA-GGG-C-CA-C----G-GUA-----CCAUGCU------UA--CC---GUGG-CC-G--C----------CC----------GAGG---AUG-G-GGCUGCGC-CCU--A------UC-A-G-GU-A-G---UUGGCG-GG-G-UAAC-GG-CCCGCCAA-GCCG-AUA-ACGGGUGGGGGCCGU-G-AG-------------A-GCGG-GAGC-CCCGAGAUGGGCA-CUGA-GA-CAAG-GGCCCAG-GCC-C-UACG--GGGU-GC-A-CC-A-G-GGGCG-AAACUUCCG--------------------CAAU-GCGG-GAAA-CCGUGACGGAGUCACCCCGAG-UGC-CA-C-----C-CG----AA-G-----AGG--G-------------------------UGGCUUUU-GC-CCGG-U-------------------------------C-----U-AA-AA----A--------------------------G-C-CG--G-GCG---------AA-UAAGC-GGGGGGCAAGC-UUGGU---GUCAGCCGCCG-CGGUAAU-AC-CAA-CCCCGCG-A-GUG-GUCGG-GACGUUUAUUGGGCCUAAAGCGUCCG-UAG-C-CGGCCC-GGUAAG-UCCC-UCCUU-AAA-GC-CCACGG-CUC-AA-CCGUGG-GAG---------------------------------------------CG-G-A----------------------------------------------GGGA-UAC-UGCCG-GGC-U-A-G-GG-GG-----CGGGAG-AGGCC-GGG-GGUACU-CCU--------------------GGG-GUA-GGG-GCGAAA-UC-C-UAUAAU-CCCAGGAGG-ACC-A-CCAG--U-G-GCGAAGGCGC--CCGGCUA------GCACG--------------------------------------------------------------CGC-CC--GACG-----GUGA-GG--GACGAAA-G-C-UGGGG-GAGC-AAAGGGG-AUUAGAUAC-----CCCCGUAG-UCCCA-GCUGUAAACGAUG-CGGG-CUAGGUG-UUGGACGG--GC-UUCGA--------------------------------GCC-CGUCCAG-UGCC-GUA-GGGAAGCCGU-UAA-GCCC-GCCGCCUG-GGGAGUACGGCC-GCAA-GGCU-GAA-ACUU-AAA-GGAAUUG-GCGGGGG-AGC--ACCA--CA-A-GGG-GUGAAG-CU-UGCGGU-UUAAUUG-G-AGUCAACG-CCGGA-AA-CCUUA-CCGGGGG---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------CGACAGC-AGGAUGAAGGCCAGGCUGACGACCUUGCC-AGA-CGAGCUGAGAGGA-GGUGCAUGGCCGUCGCCG-GCUCGUGCC-GUGA-GGUGU-CCUG-UUAA-GU-CAGGGAAC-GAGCGAGACC-CCCG-CC--CCUAG---U-U-GC-U-AC-CC--A--GCCC--------------------------UUCG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GGG---C-UGGGG-CACU-C-U-AGG-GGGAC-U-GCCG-GC-G-A-UAA-GCC-G--GAGG-A-AGG-U-GGGG-GCUACGGC--AGGUCAGUAU-G-CC-CCGAAAC--CC-C-C-GGGCUACACGCGAGCUGCAAUGG-CGGGGA--C-AGC-GGGU-U-CC-G-A-C-CCCG-AA-AGGG-G-----------GA-G-GUAA-UCC-CU-U-AAACCCC-GCC-UCAGUAGGAA-UCGAGGGC-U-GCAACUC-GCCCUCG-UGAACGUGGAAU-CCCUAGUA-ACCGC-GUG-UC-ACC-AA-CGCGCGGUG-AAU-ACGUC-CCUGCUCCUUGCA----CACACCGCCCGUC-GCU--CCACCCGA-GGGAGGCCUAGGU-GA-G-GCCU-C-C-U-GC----C----------------------------------GACG-A---------------------------------------G-GU-G-GGAGGU-CGAACCUGGGCCUCC-CA-AGG-GGGGAG-AAGUCGUAACAAGGUGGCCGUAGGGGAACCUG-CGGC-CGGAUCACCUCC-------------------------------------------------------------------------------------------\n+>ThpTenax Thermoproteus tenax\n+--------------------------------------AAACCGGUU-GAU-CCUGCCGGACC-UGACC-GC-UAUC-GGG-GUGG--GGCUAAGCCAU-GCGAGU-CGC-GCGC-CC--------------------------------------------------------GG----------------------------------GGCG----------------------------------CC----------------------------------------------------------GG--GCG-CGG-C-GC--A-CGGCUCAGU---AAC-ACGUACC-CAA--CCU-AA-CCUCGGGA-GG--GGGAC-AA-CCCCG-G-GAAA-CUG-GGG-CUGAUCCC-C-CAU-A------G-GGG-A--A-GG--GCG------------------------------------------------------'..b'UGCUACAAUGG-CGGUGA--C-AAU-GGGA-U-GC-G-A-A-GGGG-CA-ACCC-U-----------UC-G-CAAA-UCU-CA---AAAAGCC-GUC-UCAGUUCGGA-UUGGGCUC-U-GCAACUC-GAGCCCA-UGAAGUUGGAAU-CGCUAGUA-AUCGU-GGA-UC-AG-CAC-GCCACGGUG-AAU-ACGUU-CCCGGGCCUUGUA----CACACCGCCCGUC-ACA--CCAUGGGA-GUUGGCUUUACCU-GAAG-ACGG-UGC-G-CU-AA-CCC---------------------------------GCA-A-------------------------------------GGGAGG-C-AGCCGG-CCACGGUAGGGUCAG-CG-ACU-GGGGUG-AAGUCGUAACAAGGUAGCCGUAGGGGAACCUG-CGGC-UGGAUCACCU---------------------------------------------------------------------------------------------\n+>CloTyrob Clostridium tyrobutyricum\n+-------------------------------UUUAAAUUGAGAGUUU-GAU-CCUGGCUCAGGACGAAC-GC-UGGC-GGC-G-UG--C-CUAACACAU-GCAAGU-CGA-GCGA-UG---AAA------------------------------------------------CCCC----------------------------------UUCG----------------------------------GGGG---------------------------------------------------UGG--AU--UAG-CGG-C-GG--A-CGGGUGAGU---AAC-ACGUGGG-UAA--CCU-GC-CUCAAAGU-GG--GGGAU-AG-CCUUC-C-GAAA-GGA-AGA-UUAAUACC-G-CAU-AA-----A-GCC-A--A-GU--UUC---------------------------------------------------------------------------------------------A-CAU------------------------------------------------------------------------------------GGA--A-U--U--UG-GAU-GAAA--GG-A---------------------GUAA-------------------U-UC-G--C----------UU----------UGAG---AUG-G-ACCCGCGG-CGC--A------UU-A-G-UU-A-G---UUGGUG-GG-G-UAAU-GG-CCUACCAA-GACA-GCG-AUGCGUAGCCGACCU-G-AG-------------A-GGGU-GAUC-GGCCACAUUGGAA-CUGA-GA-UACG-GUCCAGA-CUC-C-UACG--GGAG-GC-A-GC-A-G-UGGGG-AAUAUUGCA--------------------CAAU-GGGC-GAAA-GCCUGAUGCAGCAACGCCGCG-UGA-GU-GA-UGAA-GGUC--UU-CG-GA-UUGUAAA------------------------GCUCUGUC-UU-UUGG-GAC-G------A--------------------------U-AA------------------------U-GA-CGGU-A-C-CA--A-AGG---------AG-GAAGC-CAC-GGCUAAC-UACGU---GCCAGCAGCCG-CGGUAAU-AC-GUAG-GUGGCG-A-GCG-UUGUC-CGGAUUUACUGGGCGUAAAGGGUGCG-UAG-G-CGGAUG-UUUAAG-UGAG-AUGUG-AAA-UA-CCCGGG-CUU-AA-CUUGGG-UGC-U-------------------------------------------GC-A-U----------------------------------------------UUCA-AAC-UGGAU-AUC-U-A-G-AG-UG-----CAGGAG-AGGAG-AAU-GGAAUU-CCU--------------------AGU-GUA-GCG-GUGAAA-UG-C-GUAGAG-AUUAGGAAG-AAC-A-CCAG--U-G-GCGAAGGCGA--UUCUCUG------GACUG--------------------------------------------------------------UAA-CU--GACG-----CUGA-GG--CACGAAA-G-C-GUGGG-UAGC-AAACAGG-AUUAGAUAC-----CCUGGUAG-UCCAC-GCCGUAAACGAUG-AGUA-CUAGGUG-UAGGAGG-----UAUCGA-CC---------------------------------CCUUCUG-UGCC-GCA-GUAAACACAU-UAA-GUAC-UCCGCCUG-GGAAGUACGAUC-GCAA-GAUU-AAA-ACUC-AAA-GGAAUUG-ACGGGGG-CCC--GC-A--CA-A-GCA-GCGGAG-CA-UGUGGU-UUAAUUC-G-AAGCAACG-CGAAG-AA-CCUUA-CCUGGAC-UUGACA-UC--------------CCCUG------A-AU-A-A-CCUA-GAGA-U-AG-G-CGAAG-C------CC-----------------------------------------UUCG----------------------------------------------------------------GG-----G---CA-GGG-A-GA--------------------------------------------------CAGGU-GGUGCAUGGUUGUCGUCA-GCUCGUGUC-GUGA-GAUGU-UAGG-UUAA-GU-CCUGCAAC-GAGCGCAACC-CUUA-UU--GUUAG---U-U-GC-U-AA-CA--U--------------------------------UCAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------UUGAG-CACU-C-U-AAC-GAGAC-U-GCCG-CG-G-U-UAA-CGC-G--GAGG-A-AGG-U-GGGG-AUGACGUC--AAAUCAUCAU-G-CC-CCUUAUG--UC-C-A-GGGCAACACACGUGCUACAAUGG-GCAGAA--C-AAA-GAGA-A-GC-A-A-U-ACCG-CG-AGGU-G-----------GA-G-CCAA-ACU-CA---AAAACUG-CUC-UCAGUUCGGA-UUGCAGGC-U-GAAACUC-GCCUGCA-UGAAGCUGGAGU-UGCUAGUA-AUCGC-GAA-UC-AG-CAU-GUCGCGGUG-AAU-ACGUU-CCCGGGCCUUGUA----CACACCGCCCGUC-ACA--CCAUGAGA-GCUGGCAACACCC-GAAG-UCCG-UAG-U-CU-AA-C-----------------------------------GUA-A---------------------------------------GAGG-A-CGCGGC-CGAAGGUGGGGUUAG-UG-AUU-GGGGUG-AAGUCGUAACAAGGUAGCCGUAGGAGAACCUG-CGGC-UGGAUCACCUCCUUUCU--------------------------------------------------------------------------------------\n'
b
diff -r 000000000000 -r a21965c8bcf1 test-data/sequence.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence.fasta Wed Oct 23 18:28:17 2019 -0400
b
b'@@ -0,0 +1,494 @@\n+>ENTD10\r\n+GGTTACCTTGTTACGACTTCACCCCAGTCATGAATCATACCGTGGTAAAC\r\n+GCCCTCCCGAAGGTTAAGCTATCTACTTCTGGTACAACCCACTCCCATGG\r\n+TGTGACGGGCGGTGTGTACAAGGCCCGGGAACGTATTCACCGCGACATTC\r\n+TGATTCGCGATTACTAGCGATTCCGACTTCATGGAGTCGAGTTGCAGACT\r\n+CCAATCCGGACTTAGACGCACTTTCTGAGATTCGCTCACTATCGCTAGCT\r\n+TGCTGCCCTCTGTATGCGCCATTGTAGCACGTGTGTAGCCCTACTCGTAA\r\n+GGGCCATGATGATTTGACGTCATCCCCACCTTCCTCCAGTTTATCACTGG\r\n+CAGTCTCCTTTGAGTTCTCAGCATTACCTGCTAGCAACAAAGGATAGGGG\r\n+TTGCGCTCGTTGCGGGACTTAACCCAACATTTCACAACACGAGCTGACGA\r\n+CAGCCATGCAGCACCTGTCTCATAGTTCCCGAAGGCACAGAAATGTCTCC\r\n+ATTTCCTTCTATGGATGTCAAGAGTAGGTAAGGTTCTTCGCGTTGCATCG\r\n+AATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCATTTG\r\n+AGTTTTAACCTTGCGGCCGTACTCCCCAGGCGGTCAATTTATCGCGGTAG\r\n+CTACGGGCGCCAGGTTCAAGACCCAACCCCCAAATTGACATCGTTTACAG\r\n+CGTGGACTACCAGGGTATCTAATCCTGTTTGCTACCCACGCTTTCGCACC\r\n+TCAGCGTCAGTCTCTCTCCAAGGGGCTGCCTTCGCCTTCGGTATTCCTCC\r\n+ACATCTCTACGCATTTCACCGCTACACGTGGAATTCTACCCCTCCCTAGA\r\n+GGACTCTAGCTGCCCAGTCTGAAATGCAATTCCCAAGTTAAGCTCGGGGC\r\n+TTTCACATCTCACTTAGACAACCGCCTGCGTGCCCTTTACGCCCAGTTAT\r\n+TCCGATTAACGCTCGCACCCTCCGTATTACCGCGGCTGCTGGCACGGAGT\r\n+TAGCCGGTGCTTCTTCTGCGACTAACGTCAATTGCATCACCTATTAGATA\r\n+ATGCACCTTCCTCATCGCTGAAAGAACTTTACAACCCGAAAGCCTTCTTC\r\n+ATTCACGCGGCATGGCTGCATCAGGGTTTCCCCCATTGTGCAATATTCCC\r\n+CACTGCTGCCTCCCGTAGGAGTCTGGACCGTGTCTCAGTTCCAGTGTGGC\r\n+TGGTCATCCTCTCAAACCAGCTAGAGATCGTAAGCTTGGTGAGCCTTTAC\r\n+CTCACCAACTACCTAATCTCACTTGGGCTCATCTCATGGCGCATGGTCCG\r\n+AAGATCCCATACTTTAGTCCGTAGACATTACGCGGTATTAGCTACAGTTT\r\n+CCCGTAGTTATCCCCCTCCATAAGGCAGATTCCCAAGCATTACTCACCCG\r\n+TCCGCCACTCGTCAGCAAGAAAGCAAGCTTCCTCCTGTTACCGTTCGACT\r\n+TGCATGTGTTAAGCCTGCCGCCAGCGTTCAATCTGAGCCATAATCAAACC\r\n+CTA\r\n+>ENTG06\r\n+GGGTTTGATCCTGGCTCAGGATGAACGCTAGCGATAGGCTTAACACATGC\r\n+AAGTCGAGGGGCAGCATGGTCTTAGCTTGCTAAGACTGATGGCGACCGGC\r\n+GCACGGGTGCGTAACGCGTATGCAACTTACCTTATAGAGGGGGATAGCTC\r\n+GTTTAACGACGAATTAATACCGCATATACAAGTAGCTCGGCATCGAGTAA\r\n+CTTGGAAATGAATTTCGCTATAAGATAGGCATGCGTCCCATTAGCTTGTT\r\n+GGTGAGGTAACGGCTCACCAAGGCAACGATGGGTAGGGGAACTGAGAGGT\r\n+TTATCCCCCACACTGGTACTGAGACACGGACCAGACTCCTACGGGAGGCA\r\n+GCAGTGAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCAAATCGC\r\n+GTGAAGGATGACTGTCTTATGGATTGTAAACTTCTTTTATATGGGAATAA\r\n+TCACAGGTACGTGTACCTGTTTGAATGTACCATATGAATAAGCATCGGCT\r\n+AACTCCGTGCCGGCAGCCGCGGTAATACGGAGGATGCGAGCGTTATCCGG\r\n+ATTTATTGGGTTTAAAGGGTGCGTAGGTGGTCTGATAAGTCAGCGGTGAA\r\n+ATCTCCGAGCTCAACTCGGAAACTGCCGTTGAAACTGTTAGACTTGAGTG\r\n+CAGATGAGGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATC\r\n+ACGCAGAACTCCAATTGCGAAGGCAGCTTACTAAGGTGCAACTGACACTG\r\n+AAGCACGAAAGCGTGGGTATCAAACAGGATTAGATACCCTGGTAGTCCAC\r\n+GCTGTAAACGATGATTACTGGGNCGTATGCGATATACAGTATGCTCCTAA\r\n+GCGAAAGCGTTAAGTAATCCACCTGGGGAGTACGCCGGCAACGGTGAAAC\r\n+TCAAAGGAATTGACGGGGGCCCGCACAAGCGGAGGAACATGTGGTTTAAT\r\n+TCGATGATACGCGAGGAACCTTACCTGGGTTTAAACTTTAGCTGACCGCT\r\n+CGAGAGATCGAGTTTCCCTTCGGGGCAGCTATTTAGGTGCTGCATGGTTG\r\n+TCGTCAGCTCGTGCCGTGAGGTGTCGGCTTAAGTGCCATAACGAGCGCAA\r\n+CCCCTGTCTACGGTTACCATCAGGTTAAGCTGGGGACTCCGTAGAGACTG\r\n+CCGTCGTAAGGCGTGAGGAAGGTGGGGATGACGTCAAATCAGCACGGCCC\r\n+TTACATCCAGGGCGACACACGTGTTACAATGGTGAGGACAAAGGGACGCT\r\n+ACTAGGCGACTAGATGCAAATCTCAAAACCTCATCACAGTTCGGATCGGA\r\n+GTCTGCAACTCGACTCCGTGAAGCTGGATTCGCTAGTAATCGCGCATCAG\r\n+CCATGGCGCGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCAAG\r\n+CCATGGGAGTTCGGAGTACCTAAAGTCCGTACCTGCGAGGGTCGGCCTAG\r\n+GGTAATACGAGTGACTGGGGCTAAGTCGTAACAAGGTAACCA\r\n+>WH044-5C\r\n+AGGGTTTGATCCTGGCTCAGATTGAACGCTGGCGGCAGGCTTAACACATG\r\n+CAAGTCGAGCGATGAAGATGTAGCTTGCTACATTGGATTAGCGGCGGACG\r\n+GGTGAGTAATATTTAGGAATCTACCTAGTAGTGGGGGATAGCCCGAGGAA\r\n+ACTCGGATTAATACCGCATACGACCTACGGGAGAAAGGGGGCTTTTAGCT\r\n+CTCGCTATTAGATGAGCCTAAACCGGATTAGCTAGTTGGTGGGGTAAAGG\r\n+CCTACCAAGGCGACGATCTGTAGCTGGTCTGAGAGGATGATCAGCCACAC\r\n+TGGGACTGAGACACGGCCCAGACCCCTACGGGAGGCAGCAGTGGGGAATA\r\n+TTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAG\r\n+GCCTTTTGGTTGTAAAGCACTTTAAGCAGTGAGGAAGACTCTATGGTTAA\r\n+TACCCATAGACGATGACGTTAACTGCAGAATAAGCACCGGCTAACTCTGT\r\n+GCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTG\r\n+GGCGTAAAGCGAGCGTAGGTGGTTATAAAAGTCAGATGTGAAATCCCTGG\r\n+GCTCAACCTAGGAACTGCATCTGAAACTCTATAACTAGAGTAGGTGAGAA\r\n+GGGAGTAGAATTTCAGGTGTAGCGGTGAAATGCGTAGATATCTGAAGGAA\r\n+TACCGATGGCGAAAGGAGCTCCCTGGCA'..b'GCAGGTGGTTTGTTAAGTCAGATGTGAAAGCCCTGGGCTCAACCCGGGAAGGTCATTTGAAACTGG\r\n+CAAGCTAGAGTACTGTAGAGGGGGGTAGAATTTCAGGTGCAGCGGTGAAATGCGTAGAGATCTGAAGGAA\r\n+TACCGGTGGCGAAAGGCGGCCCCCTGGACAGATACTGACACTCAGATGCGAAAGCGTGGGGAGCAAACAG\r\n+GATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCTACTTGGAGGTTGTTCCCTTGAGGAGTGGCTT\r\n+TCGGAGCTAACGCGTTAAGTAGACCGCCTGGGGAGTACGGTCGCAAGATTAAAACTCAAATGAATTGACG\r\n+GGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTACTCTTGAC\r\n+ATCCAGAGAATTTTCCAGAGATGGATTAGTGCCTTCGGGAACTCTGAGACAGGTGCTGCATGGCTGTCGT\r\n+CAGCTCGTGTTGTGAAATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTAATCCTTGATTGCCAGCACT\r\n+TCGGGTGGGAACTTCAGGGAGACTGCCGGTGATAAACCGGAGGAAGGTGGGGACGACGTCAAGTCATCAT\r\n+GGCCCTTACGAGTAGGGCTACACACGTGCTACAATGGCGTATACAGAGGGCGGCGAACTCGCGAGAGTAA\r\n+GCGAACCCCAAAAAGTGCGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGGAGTCGGAATCGCT\r\n+AGTAATCGTGGATCAGAATGCCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATG\r\n+GGAGTGGGCTGCAAAAGAAGTAGGTAGTTTAACCTTCGGGGGGTCCCTTACCACTTTGTGGTTCATGACT\r\n+GGGGT\r\n+>FJ203641.1\r\n+GGATGAACGCTGGCGGTATGCTTAACACATGCAAGTTGAACGAAGGTTTTACCTTAGTGGCGGACGGGTG\r\n+AGTAATACGTGAGAATCTACCTTTAGGAGGGGAATAACAGCTGGAAACGACTGCTAATGCCCCATATGCT\r\n+TTATAGTGAAATGAATTTCGCCTGAAGATGAGCTCGCGCCTGATTAGCTTGTTGGTGGGGTAATGGCCTA\r\n+CCAAGGCAACGATCAGTAGCTGATTTGAGAGGATGATCAGCCACACTGGAACTGAGACACGGTCCAGACT\r\n+TCTACGGAAGGCAGCAGTGGGGAATTTTCCGCAATGGGCGCAAGCCTGACGGAGCAATACCGCGTGAGGG\r\n+ATGAATGCCTATGGGTTGTAAACCTCTTTTATTAGGGAAGAATTTTGACGGTACCTAATGAATAAGCATC\r\n+GGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGGGGATGCAAGCGTTATCCGGAATTATTGGGCGTAAA\r\n+GAGTCTGTAGGTTGCGCTATAAGTCTACTGTTAAATATTAGAGCTCAACTTTAAACAAGCAGTAGAAACT\r\n+ATTGTGCTTGAGTATGGTAGGGGTAAAGGGAATTCCCAGTGTAGCGGTGAAATGCGTAGATATTGGGAAG\r\n+AACACCAGAAGCGAAAGCGCTTTACTGGGCCATTGCTGACACTGAGGGACGAAAGCTAGGGGAGCGAATG\r\n+GGATTAGATACCCCAGTAGTCCTAGCCGTAAACGATGGATACTAGATGTTGTGCGTATCGATCCGTGCAG\r\n+TATCGTAGCTAACGCGTTAAGTATCCCGCCTGGGAAGTACGCTCGCAAGAGTGAAACTCAAAGGAATTGA\r\n+CGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACACGAAGAACCTTACCAGAACTCG\r\n+ACATGTCGCAAATTTTTGTGAAAGCGAAAAGTGCCTTCGGGAATGCGAACACAGGTGGTGCATGGCTGTC\r\n+GTCAGCTCGTGTCGTGAGATGCTGGGTTAAGTCCCGCAACGAGCGCAACCCTTGTCTTTATTTGCCATCA\r\n+TTGAGTTGGGTACTTTAAAGAGACTGCCGGTGACAAACCGGAGGAAGGTGAGGATGACGTCAAGTCAGCA\r\n+TGCCCCTTACGTTCTGGGCTACACACGTGCTACAATGGTTATGACAAAAAGTTGCAAGTCTGTGAAGACA\r\n+AGCTAATCTATAAACATAGCCTCAGTTCGGATTGTAGGCTGAAACTCGCCTACATGAAGGTGGAATCGCT\r\n+AGTAATCGCCGGTCAGCTATACGGCGGTGAATCCGTTCCCGGGCCTTGTACACACCGCCCGTCACACCAT\r\n+GGGAGCTGGCCATGCCCAAAGCTGTTACCTTAACTTTATTTAAGAGAGGTTCATCTAAGGTGGGGCTAGT\r\n+GACTGGGGT\r\n+>FJ203629.1\r\n+ATGAACGCTAGCGGCAGGCCTCATACATGCAAGTCGAGGGGCAGCAGGACACTTCGGTGTTGCTGGCGAC\r\n+CGGCGCACGGGTGCGTAACACGTATGCAATCTGCCTTGCACTGGAGCATAGCCCTCGGAAACGAAGATTA\r\n+ATACTCCATACAGTCTTACTAGCGCATGCTAGTAAGATGAAAGCTCCGGCGGTACAAGATGAGCATGCAT\r\n+CCCATTAGCTTGTTGGTGAGATAACAGCTCACCAAGGCTACGATGGGTAGGGGTTCTGAGAGGAAGATCC\r\n+CCCACACTGGCACTGAGATACGGGCCAGACTCCTACGGGAGGCAGCAGTAGGGAATATTGGTCAATGGGC\r\n+GCAAGCCTGAACCAGCCATGCCGCGTGCAGGATGAAGGCCTTCAGGGTTGTAAACTGCTTTTGTATGGGA\r\n+ATAAAAAAGTCCTTGCGAGGACTCTTGAAGGTACCATAAGAATAAGCACCGGCTAACTCCGTGCCAGCAG\r\n+CCGCGGTAATACGGAGGGTGCAGGCGTTATCCGGATTTACTGGGTTTAAAGAGTGCGTAGGCGGCTTCTT\r\n+AAGTCAGTGGTGAAAGCTTAGCGCTTAACGCTAGAAGTGCCACTGATACTGGGAAGCTTGAGTCAAGAAG\r\n+AGGTAAGCAGAATTCATAGTGTAGCAGTGAAATGCTTAGATACTATGAGGAATACCAACAGCGAAGGCAG\r\n+CTTACTGGTCTTGTACTGACGTTGAGGCACGAAAGCGTGGGTAGCGAACAGGATTAGATACCCTGGTAGT\r\n+CCACGCCGTAAACGATGATCACTCGATGTACACGCTAGTCGTGTGTGTCTAAGCAAAAGCATTAAGTGAT\r\n+CCACCTGGGGAGTACGCTCGCAAGAGTGAAACTCAAAGGAATTGACGGGAGTCCGCACAAGCGGTGGAGC\r\n+ATGTGGTTTAATTCGATAATACGCGAGGAACCTTACCTGGGCTCGAATGTCTCTGCTACCTCGAGAAATC\r\n+GAGGGTTCCTTCGGGACAGAGTACAAGGTGCTGCATGGCTGTCGTCAGCTCGTGTCGTGAGATGTTGGGT\r\n+TAAGTCCTATAACGAGCGCAACCCCTTTACTTAGTTGCCAGCACATCATGGTGGGAACTCTAAGTAGACT\r\n+GCCTGCGTAAGTAGGAGGAAGGAGGGGACGAGGTCAAGTCATCATGGCCCTTACGCCCAGGGCTACACAC\r\n+GTGCTACAATGGCGTGTACAGAAGGACGCTACTTGGTAACAAGCGGCAAATCCTCAAAGCACGTCTCAGT\r\n+TCGGATTGAGGTCTGCAACTCGACCTCATGAAGCTGGAATCGCTAGTAATCGCGCATCAGCAATGGCGCG\r\n+GTGAATACGTTCCCGGACTTTGTACACACCGCCCGTCAAGCCATGGGAGTTGGTAGGACCTAAAGACAGT\r\n+GGCCAATTAAGGAGCTGTTAAGGGTTAGATCAGCGACTGGGGCTA\r\n+\r\n+      \n\\ No newline at end of file\n'
b
diff -r 000000000000 -r a21965c8bcf1 test-data/test_sina_references.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_sina_references.loc Wed Oct 23 18:28:17 2019 -0400
b
@@ -0,0 +1,1 @@
+testarb testarb ${__HERE__}/reference.arb
\ No newline at end of file
b
diff -r 000000000000 -r a21965c8bcf1 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Oct 23 18:28:17 2019 -0400
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="sina_references" comment_char="#" allow_duplicate_entries="false">
+        <columns>value, name, path</columns>
+        <file path="tool-data/sina_references.loc.sample"/>
+    </table>
+</tables>
\ No newline at end of file
b
diff -r 000000000000 -r a21965c8bcf1 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Oct 23 18:28:17 2019 -0400
b
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="sina_references" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/test_sina_references.loc" />
+    </table>
+</tables>