changeset 3:8afa8799d50f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sina commit 8f89414ecee8120447febc2902ff1ddbbd71e638"
author iuc
date Fri, 02 Oct 2020 08:01:21 +0000
parents 49b937f12aa4
children 482052b30b78
files README.md README.rst macros.xml sina.xml test-data/output_fasta_arb.arb test-data/output_fasta_fasta.arb
diffstat 6 files changed, 88 insertions(+), 84 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Wed Dec 25 12:09:28 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-**Add reference libraries**
-
-[Reference databases](https://www.arb-silva.de/download/arb-files/) provided by the [SILVA project](https://www.arb-silva.de/) can be used within this wrapper by adding the corresponding files to the tool-data directory and editing` tool-data/sina_references.loc.sample` as follows: 
-
-    LSU_Parc	${__HERE__}/SILVA_132_LSUParc_12_12_17_opt.arb
-    LSU_Ref	${__HERE__}/SILVA_132_LSURef_07_12_17_opt.arb
-    SSU_Ref	${__HERE__}/SILVA_132_SSURef_12_12_17_opt.arb
-    Ref_NR_99	${__HERE__}/SILVA_132_SSURef_NR99_13_12_17_opt.arb
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Fri Oct 02 08:01:21 2020 +0000
@@ -0,0 +1,11 @@
+**Add reference libraries**
+
+[Reference databases](https://www.arb-silva.de/download/arb-files/) provided by the [SILVA project](https://www.arb-silva.de/) can be used within this wrapper by adding the corresponding files to the tool-data directory and editing `tool-data/sina_references.loc.sample` as follows: 
+
+    `LSU_Parc	${__HERE__}/SILVA_132_LSUParc_12_12_17_opt.arb`
+
+    `LSU_Ref	${__HERE__}/SILVA_132_LSURef_07_12_17_opt.arb`
+
+    `SSU_Ref	${__HERE__}/SILVA_132_SSURef_12_12_17_opt.arb`
+
+    `Ref_NR_99	${__HERE__}/SILVA_132_SSURef_NR99_13_12_17_opt.arb`
\ No newline at end of file
--- a/macros.xml	Wed Dec 25 12:09:28 2019 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-<macros>
-    <token name="@TOOL_VERSION@">1.5.0</token>
-    <token name="@WRAPPER_VERSION@">galaxy1</token>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="@TOOL_VERSION@">sina</requirement>
-            <yield/>
-        </requirements>
-    </xml>
-</macros>
--- a/sina.xml	Wed Dec 25 12:09:28 2019 -0500
+++ b/sina.xml	Fri Oct 02 08:01:21 2020 +0000
@@ -1,51 +1,54 @@
 <tool id="sina" name="SINA" version="@TOOL_VERSION@+@WRAPPER_VERSION@" profile="18.01">
     <description>reference based multiple sequence alignment</description>
     <macros>
-        <import>macros.xml</import>
+        <token name="@TOOL_VERSION@">1.6.1</token>
+        <token name="@WRAPPER_VERSION@">galaxy0</token>
+        <xml name="minidty">
+            <param name="minidty" type="float" value="0.7" min="0.0" max="1.0" label="Exclude sequences sharing less than this value fractional identity with any of the alignment reference sequences from the output?" help="(--min-idty)"/>
+        </xml>
     </macros>
-    <expand macro="requirements"/>
-    <stdio></stdio>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">sina</requirement>
+    </requirements>
+    <version_command>sina --version</version_command>
     <command detect_errors="exit_code"><![CDATA[
         ## initialize
         ## parse custom reference from fasta to arb file format
-
         #if $db.select == 'custom'
             #if $db.custom.ext == 'fasta'
-                sina -i '$db.custom' --prealigned -o reference.arb &&
+                sina -i '$db.custom' --prealigned -o 'reference.arb' &&
             #else
-                ln -s '$db.custom' reference.arb &&
+                ln -s '$db.custom' 'reference.arb' &&
             #end if
         #elif $db.select == 'local'
-            ln -s '$db.arb_databases.fields.path' reference.arb &&
+            ln -s '$db.arb_databases.fields.path' 'reference.arb' &&
         #end if
 
         ## run
         sina
             --in='$in'
-            --db=reference.arb
+            --db='reference.arb'
             -p \${GALAXY_SLOTS:-4}
             --num-pts=\${GALAXY_SLOTS:-4}
-
             #if $os.type == 'arb'
                 --outtype='arb'
-                --out=output.arb
+                --out='output.arb'
             #elif $os.type == 'fasta'
                 --outtype='fasta'
-                --out=output.fasta
+                --out='output.fasta'
                 --min-idty=$os.minidty
             #elif $os.type == 'fasta_meta'
                 --outtype='fasta'
-                --out=output.fasta
+                --out='output.fasta'
                 --min-idty=$os.minidty
                 --meta-fmt='csv'
             #end if
             #if $log == 'yes'
-                --log-file=$logfile
+                --log-file='$logfile'
             #elif $log == 'yes_meta'
-                --log-file=$logfile
+                --log-file='$logfile'
                 --show-diff
             #end if
-
             ## Advanced alignment parameters
             --overhang=$ap.overhang
             --insertion=$ap.insertion
@@ -54,7 +57,6 @@
             #elif $ap.turn == 'all'
                 -t all
             #end if
-
             ## Expert Alignment Parameters
             --fs-min=$eap.fsmin
             --fs-max=$eap.fsmax
@@ -74,7 +76,6 @@
             --fs-kmer-mm=$eap.fskmermm
             $eap.fskmernofast
             $eap.fskmernorel
-
             ## Advanced search and classification parameters
             #if $asacp.activate == 'yes'
                 --search
@@ -86,7 +87,6 @@
                 $asacp.searchnofast
                 $asacp.searchkmernorel
             #end if
-
             ## convert meta file to tabular
             #if $os.type == 'fasta_meta'
                 && cat output.csv | sed 's/,/\t/g' > output.tsv
@@ -94,17 +94,17 @@
     ]]></command>
     <!-- Sections and default parameters are based on https://www.arb-silva.de/aligner -->
     <inputs>
-        <param argument="--in" type="data" format="fasta" multiple="false" label="Sequence file" help="FASTA file format"/>
+        <param argument="--in" type="data" format="fasta" label="Select sequence file" help="FASTA file format"/>
         <conditional name="db">
-            <param name="select" type="select" label="Reference library type" help="">
+            <param name="select" type="select" label="Select reference library type">
                 <option value="custom" selected="true">Custom</option>
                 <option value="local">Local cached</option>
             </param>
             <when value="custom">
-                <param name="custom" type="data" format="data" label="Reference library file" help="FASTA or ARB file format (--db)"/>
+                <param name="custom" type="data" format="data" label="Select reference library file" help="FASTA or ARB file format (--db)"/>
             </when>
             <when value="local">
-                <param name="arb_databases" type="select" label="Local cached libraries" help="">
+                <param name="arb_databases" type="select" label="Select local cached library">
                     <options from_data_table="sina_references">
                         <validator message="No database is available" type="no_options"/>
                     </options>
@@ -112,77 +112,77 @@
             </when>
         </conditional>
         <conditional name="asacp">
-            <param name="activate" type="select" label="Enable search stage" help="(--search)">
+            <param name="activate" type="select" label="Enable search stage?" help="(--search)">
                 <option value="no">No</option>
                 <option value="yes">Yes</option>
             </param>
             <when value="no"></when>
             <when value="yes">
-                <param name="searchkmercandidates" type="integer" value="1000" label="Set number of candidate reference sequences retrieved from the k-mer based search." help="For each candidate, the MSA based similarity is calculated and the search result based on these numbers. A value for n one or two orders larger than --search-max-result is usually quite sufficient. (--search-kmer-candidates)"/>
-                <param name="lcaquorum" type="float" value="0.8" min="0.0" max="1.0" label="Fraction of the search result that must share the same classification" help="Using the default parameters --search-max-result=10 and --lca-quorum=0.7, this means that the deepest classification shared by 7 out of the top 10 search results is chosen for the query sequence. (--lca-quorum)"/>
-                <param name="searchkmerlen" type="integer" value="10" min="0" label="Set k for the kmer based candidate search." help="See --fs-kmer-len. (--search-kmer-len)"/>
-                <param name="searchkmermm" type="integer" value="0" min="0" label="Set the number of allowed mismatches within each kmer." help="See --fs-kmer-mm.  (--search-kmer-mm)"/>
-                <param name="searchnofast" type="boolean" truevalue="--search-no-fast" falsevalue="" checked="true" label="Skip fast family search." help="This option configures the same behavior for the search stage. See --fs-kmer-no-fast. (--search-no-fast)"/>
-                <param name="searchkmernorel" type="boolean" truevalue="--search-kmer-norel" falsevalue="" checked="false" label="Configures the candidate search to use absolute rather than length-relative scores for ordering the results." help="See --fs-kmer-norel. ( --search-kmer-norel)"/>
-                <param name="searchmaxresult" type="integer" value="10" min="0" label="The maximum number of search results to return for each query sequence." help="(--search-max-result)"/>
+                <param name="searchkmercandidates" type="integer" value="1000" label="Set number of candidate reference sequences retrieved from the k-mer based search" help="For each candidate, the MSA based similarity is calculated and the search result based on these numbers. A value for n one or two orders larger than --search-max-result is usually quite sufficient. (--search-kmer-candidates)"/>
+                <param name="lcaquorum" type="float" value="0.8" min="0.0" max="1.0" label="Set fraction of the search result that must share the same classification" help="Using the default parameters --search-max-result=10 and --lca-quorum=0.7, this means that the deepest classification shared by 7 out of the top 10 search results is chosen for the query sequence. (--lca-quorum)"/>
+                <param name="searchkmerlen" type="integer" value="10" min="0" label="Set k for the kmer based candidate search" help="See --fs-kmer-len. (--search-kmer-len)"/>
+                <param name="searchkmermm" type="integer" value="0" min="0" label="Set number of allowed mismatches within each kmer" help="See --fs-kmer-mm.  (--search-kmer-mm)"/>
+                <param name="searchnofast" type="boolean" truevalue="--search-no-fast" falsevalue="" checked="true" label="Skip fast family search?" help="This option configures the same behavior for the search stage. See --fs-kmer-no-fast. (--search-no-fast)"/>
+                <param name="searchkmernorel" type="boolean" truevalue="--search-kmer-norel" falsevalue="" checked="false" label="Configures the candidate search to use absolute rather than length-relative scores for ordering the results?" help="See --fs-kmer-norel. ( --search-kmer-norel)"/>
+                <param name="searchmaxresult" type="integer" value="10" min="0" label="Set maximum number of search results to return for each query sequence" help="(--search-max-result)"/>
             </when>
         </conditional>
         <conditional name="os">
-            <param name="type" type="select" label="Output file type" help="(--outtype)">
+            <param name="type" type="select" label="Select output file type" help="(--outtype)">
                 <option value="fasta">FASTA</option>
                 <option value="fasta_meta">FASTA with meta-file</option>
                 <option value="arb">ARB</option>
             </param>
             <when value="fasta">
-                <param name="minidty" type="float" value="0.7" min="0.0" max="1.0" label="Exclude sequences sharing less than this value fractional identity with any of the alignment reference sequences from the output." help="(--min-idty)"/>
+                <expand macro="minidty"/>
             </when>
             <when value="fasta_meta">
-                <param name="minidty" type="float" value="0.7" min="0.0" max="1.0" label="Exclude sequences sharing less than this value fractional identity with any of the alignment reference sequences from the output." help="(--min-idty)"/>
+                <expand macro="minidty"/>
             </when>
             <when value="arb"/>
         </conditional>
-        <param name="log" type="select" label="Create log file" help="(--log-file, --show-diff)">
+        <param name="log" type="select" label="Create log file?" help="(--log-file, --show-diff)">
             <option value="no">no</option>
             <option value="yes">yes</option>
             <option value="yes_meta">yes with differences between the inferred alignment and the original alignment</option>
         </param>
         <section name="ap" title="Alignment parameters">
-            <param argument="--overhang" type="select" label="Bases remaining unaligned at the ends should be" help="">
-                <option value="attach">attached to the last aligned base.</option>
-                <option value="edge">moved to the edge of the alignment.</option>
+            <param argument="--overhang" type="select" label="Bases remaining unaligned at the ends should be">
+                <option value="attach">attached to the last aligned base</option>
+                <option value="edge">moved to the edge of the alignment</option>
                 <!-- maybe broken, see https://sina.readthedocs.io/en/latest/commandline.html#alignment-options -->
-                <option value="remove">removed.</option>
+                <option value="remove">removed</option>
             </param>
             <param argument="--insertion" type="select" label="Insertions wider than available columns should be">
-                <option value="forbid">forbidden during alignment.</option>
-                <option value="shift">accomodated by pushing out surrounding bases.</option>
-                <option value="remove">removed.</option>
+                <option value="forbid">forbidden during alignment</option>
+                <option value="shift">accomodated by pushing out surrounding bases</option>
+                <option value="remove">removed</option>
             </param>
-            <param argument="--turn" type="select" label="Enable turn check stage" help="Sequences not oriented in accordance with the reference database will be reverse complemented as needed. If all is specified, sequences will also be tested for only reversal or only complemented (this should only be necessary if your data was mishandled).">
+            <param argument="--turn" type="select" label="Enable turn check stage?" help="Sequences not oriented in accordance with the reference database will be reverse complemented as needed. If all is specified, sequences will also be tested for only reversal or only complemented (this should only be necessary if your data was mishandled).">
                 <option value="yes">Yes</option>
                 <option value="no">No</option>
                 <option value="all">All</option>
             </param>
         </section>
         <section name="eap" title="Expert alignment parameters">
-            <param name="fsmin" type="integer" value="15" min="0" label="Minimum number of reference sequences used for each query" help="(--fs-min)"/>
-            <param name="fsmax" type="integer" value="40" min="0" label="Maximum number of reference sequences used for each query" help="(--fs-max)"/>
-            <param name="fsmsc" type="float" value="0.7" min="0.0" max="1.0" label="Minimum similarity reference sequences" help="Required to have with the query sequence. This affects the range between --fs-min and --fs-max(--fs-msc)"/>
-            <param name="fsweight" type="integer" value="1" min="0" label="Adjust the weight factor for the frequency at which a node was observed in the reference alignment." help="Use 0 to disable weighting. This feature prefers the more common placement for bases with inconsistent alignment in the reference database. (--fs-weight)"/>
-            <param name="fsreq" type="integer" value="1" min="0" label="Minimum number of reference sequences that must be found in order to attempt alignment" help="If fewer sequences than indicated here are found, the respective query sequence will be discarded. (--fs-req)"/>
-            <param name="fsreqfull" type="integer" value="1" min="0" label="Minimum number of full length reference sequences that must be included in the selected reference set" help="The search will proceed regardless of other settings until this setting has been satisfied. If it cannot be satisfied by any sequence in the reference database, the query sequence will be discarded. This setting exists to ensure that the entire length of the query sequence will be covered in the presence of partial sequences contained within your reference database. Note: If you are working with sequences other than 16S, you need to adjust this value or the value of --fs-full-len accordingly. In particular when working with short reference sequences, this setting may prevent any acceptable reference sequences from being found, leading to no sequences being aligned. (--fs-req-full)"/>
-            <param name="fsfulllen" type="integer" value="1400" min="0" label="Minimum length a sequence is required to have to be considered full length" help="(--fs-full-len)"/>
-            <param name="genestart" type="integer" value="0" min="0" label="Sets the beginning of the gene within the reference alignment." help="See --fs-cover-gene. (--gene-start)"/>
-            <param name="geneend" type="integer" value="0" min="0" label="Sets the end of the gene within the reference alignment." help="See --fs-cover-gene. (--gene-end)"/>
-            <param name="fscovergene" type="integer" value="0" label="Require total of n sequences to cover each the beginning and the end of the gene within the alignment." help="Similar to --fs-req-full.  This option is more precise than --fs-req-full, but requires that the column numbers for the range in which the full gene is expected be specified via --gene-start and --gene-end. (--fs-cover-gene)"/>
-            <param name="matchscore" type="integer" value="2" min="0" label="Score given for a match" help="(--match-score)"/>
-            <param name="mismatchscore" type="integer" value="-1" max="0" label="Score given for a mismatch" help="(--mismatch-score)"/>
-            <param name="pengap" type="integer" value="5" min="0" label="Set the penalty subtracted from the score for opening a gap." help="(--pen-gap)"/>
-            <param name="pengapext" type="integer" value="2" min="0" label="Set the penalty subtracted from the score for extending a gap." help="(--pen-gapext)"/>
-            <param name="fskmerlen" type="integer" value="10" min="0" label="Size of k for the reference search" help="For SSU rRNA sequences, the default of 10 is a good value. For different sequence types, different values may perform better. For 5S, for example, 6 has shown to be more effective. (--fs-kmer-len)"/>
-            <param name="fskmermm" type="integer" value="0" min="0" label="Allow k-mer matches in the reference database to contain this number of mismatches." help=" (--fs-kmer-mm)"/>
-            <param name="fskmernofast" type="boolean" truevalue="--fs-kmer-no-fast" falsevalue="" checked="false" label="Use all k-mers occurring in the query sequence in the search." help="By default, only k-mers starting with an A are used for extra performance. (--fs-kmer-no-fast)"/>
-            <param name="fskmernorel" type="boolean" truevalue="--fs-kmer-norel" falsevalue="" checked="false" label="Use absolute match scores in the kmer search." help="Absolute (number of shared k-mers) rather than relative (number or shared k-mers divided by length of reference sequence) (--fs-kmer-norel)"/>
+            <param name="fsmin" type="integer" value="15" min="0" label="Set minimum number of reference sequences used for each query" help="(--fs-min)"/>
+            <param name="fsmax" type="integer" value="40" min="0" label="Set maximum number of reference sequences used for each query" help="(--fs-max)"/>
+            <param name="fsmsc" type="float" value="0.7" min="0.0" max="1.0" label="Set minimum similarity reference sequences" help="Required to have with the query sequence. This affects the range between --fs-min and --fs-max(--fs-msc)"/>
+            <param name="fsweight" type="integer" value="1" min="0" label="Set weight factor for the frequency at which a node was observed in the reference alignment" help="Use 0 to disable weighting. This feature prefers the more common placement for bases with inconsistent alignment in the reference database. (--fs-weight)"/>
+            <param name="fsreq" type="integer" value="1" min="0" label="Set minimum number of reference sequences that must be found in order to attempt alignment" help="If fewer sequences than indicated here are found, the respective query sequence will be discarded. (--fs-req)"/>
+            <param name="fsreqfull" type="integer" value="1" min="0" label="Set minimum number of full length reference sequences that must be included in the selected reference set" help="The search will proceed regardless of other settings until this setting has been satisfied. If it cannot be satisfied by any sequence in the reference database, the query sequence will be discarded. This setting exists to ensure that the entire length of the query sequence will be covered in the presence of partial sequences contained within your reference database. Note: If you are working with sequences other than 16S, you need to adjust this value or the value of --fs-full-len accordingly. In particular when working with short reference sequences, this setting may prevent any acceptable reference sequences from being found, leading to no sequences being aligned. (--fs-req-full)"/>
+            <param name="fsfulllen" type="integer" value="1400" min="0" label="Set minimum length a sequence is required to have to be considered full length" help="(--fs-full-len)"/>
+            <param name="genestart" type="integer" value="0" min="0" label="Set beginning of the gene within the reference alignment" help="See --fs-cover-gene. (--gene-start)"/>
+            <param name="geneend" type="integer" value="0" min="0" label="Set end of the gene within the reference alignment" help="See --fs-cover-gene. (--gene-end)"/>
+            <param name="fscovergene" type="integer" value="0" label="Set total of n sequences to cover each the beginning and the end of the gene within the alignment" help="Similar to --fs-req-full.  This option is more precise than --fs-req-full, but requires that the column numbers for the range in which the full gene is expected be specified via --gene-start and --gene-end. (--fs-cover-gene)"/>
+            <param name="matchscore" type="integer" value="2" min="0" label="Set score given for a match" help="(--match-score)"/>
+            <param name="mismatchscore" type="integer" value="-1" max="0" label="Set score given for a mismatch" help="(--mismatch-score)"/>
+            <param name="pengap" type="integer" value="5" min="0" label="Set penalty subtracted from the score for opening a gap" help="(--pen-gap)"/>
+            <param name="pengapext" type="integer" value="2" min="0" label="Set penalty subtracted from the score for extending a gap" help="(--pen-gapext)"/>
+            <param name="fskmerlen" type="integer" value="10" min="0" label="Set size of k for the reference search" help="For SSU rRNA sequences, the default of 10 is a good value. For different sequence types, different values may perform better. For 5S, for example, 6 has shown to be more effective. (--fs-kmer-len)"/>
+            <param name="fskmermm" type="integer" value="0" min="0" label="Set k-mer matches in the reference database to contain this number of mismatches" help=" (--fs-kmer-mm)"/>
+            <param name="fskmernofast" type="boolean" truevalue="--fs-kmer-no-fast" falsevalue="" label="Use all k-mers occurring in the query sequence in the search?" help="By default, only k-mers starting with an A are used for extra performance. (--fs-kmer-no-fast)"/>
+            <param name="fskmernorel" type="boolean" truevalue="--fs-kmer-norel" falsevalue="" label="Use absolute match scores in the kmer search?" help="Absolute (number of shared k-mers) rather than relative (number or shared k-mers divided by length of reference sequence) (--fs-kmer-norel)"/>
         </section>
     </inputs>
     <outputs>
@@ -234,7 +234,11 @@
             <section name="os">
                 <param name="type" value="arb"/>
             </section>
-            <output name="output_arb" file="output_fasta_fasta.arb" compare="sim_size" delta="100"/>
+            <output name="output_arb">
+                <assert_contents>
+                    <has_size value="21369"/>
+                </assert_contents>
+            </output>
         </test>
         <!-- #3 in: *.fasta; db: *.arb; out: *.fasta; standard parameters -->
         <test>
@@ -269,7 +273,11 @@
             <section name="os">
                 <param name="type" value="arb"/>
             </section>
-            <output name="output_arb" file="output_fasta_arb.arb" compare="sim_size" delta="1000"/>
+            <output name="output_arb">
+                <assert_contents>
+                    <has_size value="19999"/>
+                </assert_contents>
+            </output>
         </test>
         <!-- #5 in: *.fasta; db: *.arb; out: *.fasta; custom parameters -->
         <test>
@@ -333,10 +341,13 @@
             <section name="os">
                 <param name="type" value="arb"/>
             </section>
-            <output name="output_arb" file="output_fasta_arb.arb" compare="sim_size" delta="1000"/>
+            <output name="output_arb">
+                <assert_contents>
+                    <has_size value="19999"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
-
     <help><![CDATA[
 .. class:: infomark
 
@@ -350,7 +361,7 @@
 
 **Input**
 
-SINA requires sequences in FASTA file format, whereas libraries can be also provided as ARB files. Furthermore, reference databases can be added as data tables. See README.md for more information.
+SINA requires sequences in FASTA file format, whereas libraries can be also provided as ARB files. Furthermore, reference databases can be added as data tables. See README.rst for more information.
 
 **Output**
 
@@ -365,4 +376,4 @@
     <citations>
         <citation type="doi">10.1093/bioinformatics/bts252</citation>
     </citations>
-</tool>
+</tool>
\ No newline at end of file
Binary file test-data/output_fasta_arb.arb has changed
Binary file test-data/output_fasta_fasta.arb has changed