Mercurial > repos > devteam > samtools_sort

--- a/macros.xml	Sat Apr 06 06:37:47 2019 -0400
+++ b/macros.xml	Tue Sep 28 16:16:52 2021 +0000
@@ -5,10 +5,16 @@
             <yield/>
         </requirements>
     </xml>
-    <token name="@TOOL_VERSION@">1.9</token>
-    <token name="@FLAGS@">#set $flags = sum(map(int, str($filter).split(',')))</token>
+    <token name="@TOOL_VERSION@">1.13</token>
+    <token name="@PROFILE@">20.05</token>
+    <token name="@FLAGS@"><![CDATA[
+        #set $flags = 0
+        #if $filter
+            #set $flags = sum(map(int, str($filter).split(',')))
+        #end if
+    ]]></token>
     <token name="@PREPARE_IDX@"><![CDATA[
-        ##prepare input and indices
+        ##prepare input and indices
         ln -s '$input' infile &&
         #if $input.is_of_type('bam'):
             #if str( $input.metadata.bam_index ) != "None":
@@ -25,7 +31,7 @@
         #end if
     ]]></token>
     <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[
-        ##prepare input and indices
+        ##prepare input and indices
         #for $i, $bam in enumerate( $input_bams ):
             ln -s '$bam' '${i}' &&
             #if $bam.is_of_type('bam'):
@@ -63,6 +69,51 @@
             #set reffai=None
         #end if
     ]]></token>
+
+    <xml name="optional_reference">
+        <conditional name="addref_cond">
+            <param name="addref_select" type="select" label="Use a reference sequence">
+                <help>@HELP@</help>
+                <option value="no">No</option>
+                <option value="history">Use a genome/index from the history</option>
+                <option value="cached">Use a built-in genome</option>
+            </param>
+            <when value="no"/>
+            <when value="history">
+                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
+            </when>
+            <when value="cached">
+                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
+                    </options>
+                    <validator  type="no_options" message="No reference genome is available for the build associated with the selected input dataset"/>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="mandatory_reference" token_help="" token_argument="">
+        <conditional name="addref_cond">
+            <param name="addref_select" type="select" label="Use a reference sequence">
+                <help>@HELP@</help>
+                <option value="history">Use a genome/index from the history</option>
+                <option value="cached">Use a built-in genome</option>
+            </param>
+            <when value="history">
+                <param name="ref" argument="@ARGUMENT@" type="data" format="fasta,fasta.gz" label="Reference"/>
+            </when>
+            <when value="cached">
+                <param name="ref" argument="@ARGUMENT@" type="select" label="Reference">
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" ref="input" key="dbkey" column="dbkey"/>
+                        <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" />
+                    </options>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+
     <token name="@ADDTHREADS@"><![CDATA[
         ##compute the number of ADDITIONAL threads to be used by samtools (-@)
         addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
@@ -70,28 +121,28 @@
     <token name="@ADDMEMORY@"><![CDATA[
         ##compute the number of memory available to samtools sort (-m)
         ##use only 75% of available: https://github.com/samtools/samtools/issues/831
-        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
+        addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} &&
         ((addmemory=addmemory*75/100)) &&
     ]]></token>
     <xml name="seed_input">
-       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
+       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
     </xml>
-    <xml name="flag_options">
-        <option value="1">read is paired</option>
-        <option value="2">read is mapped in a proper pair</option>
-        <option value="4">read is unmapped</option>
-        <option value="8">mate is unmapped</option>
-        <option value="16">read reverse strand</option>
-        <option value="32">mate reverse strand</option>
-        <option value="64">read is the first in a pair</option>
-        <option value="128">read is the second in a pair</option>
-        <option value="256">alignment or read is not primary</option>
-        <option value="512">read fails platform/vendor quality checks</option>
-        <option value="1024">read is a PCR or optical duplicate</option>
-        <option value="2048">supplementary alignment</option>
+    <xml name="flag_options" token_s1="false" token_s2="false" token_s4="false" token_s8="false" token_s16="false" token_s32="false" token_s64="false" token_s128="false" token_s256="false" token_s512="false" token_s1024="false" token_s2048="false">
+        <option value="1" selected="@S1@">Read is paired</option>
+        <option value="2" selected="@S2@">Read is mapped in a proper pair</option>
+        <option value="4" selected="@S4@">Read is unmapped</option>
+        <option value="8" selected="@S8@">Mate is unmapped</option>
+        <option value="16" selected="@S16@">Read is mapped to the reverse strand of the reference</option>
+        <option value="32" selected="@S32@">Mate is mapped to the reverse strand of the reference</option>
+        <option value="64" selected="@S64@">Read is the first in a pair</option>
+        <option value="128" selected="@S128@">Read is the second in a pair</option>
+        <option value="256" selected="@S256@">Alignment of the read is not primary</option>
+        <option value="512" selected="@S512@">Read fails platform/vendor quality checks</option>
+        <option value="1024" selected="@S1024@">Read is a PCR or optical duplicate</option>
+        <option value="2048" selected="@S2048@">Alignment is supplementary</option>
     </xml>

-    <!-- region specification macros and tokens for tools that allow the specification
+    <!-- region specification macros and tokens for tools that allow the specification
          of region by bed file / space separated list of regions -->
     <token name="@REGIONS_FILE@"><![CDATA[
         #if $cond_region.select_region == 'tab':
--- a/samtools_sort.xml	Sat Apr 06 06:37:47 2019 -0400
+++ b/samtools_sort.xml	Tue Sep 28 16:16:52 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="samtools_sort" name="Samtools sort" profile="18.01" version="2.0.3">
+<tool id="samtools_sort" name="Samtools sort" version="2.0.4" profile="@PROFILE@">
     <description>order of storing aligned sequences</description>
     <macros>
         <import>macros.xml</import>
@@ -20,8 +20,9 @@
                 $prim_key_cond.tag
                 $prim_key_cond.sec_key_select
             #end if
+            $minhash
             -O bam
-            -T sorttmp
+            -T "\${TMPDIR:-.}"
             '${input1}'
              > '${output1}'
     ]]></command>
@@ -43,6 +44,7 @@
                 </param>
            </when>
         </conditional>
+        <param name="minhash" type="boolean" argument="-M" truevalue="-M" falsevalue="" checked="false" label="Minhash collation" help="Use minimiser for clustering unaligned/unplaced reads."/>
         <!--<param name="compression" type="integer" argument="-l" optional="True" min="0" max="9" label="compression level" help="0 (uncompressed) to 9 (best)"/>-->
     </inputs>
     <outputs>
@@ -56,35 +58,32 @@
     </outputs>
     <tests>
         <!-- tests from https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl#L2464 -->
-        <!-- # Pos sort -->
+        <!-- 1) # Pos sort -->
         <test>
             <param name="input1" value="test_input_1_a.bam" ftype="bam" />
-            <output name="output1" file="pos.sort.expected.bam" ftype="bam"/>
+            <output name="output1" file="pos.sort.expected.bam" ftype="bam" lines_diff="4" />
         </test>
     	<!-- test_cmd($opts, out=>"sort/pos.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads}  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -"); -->
-
-        <!-- # Name sort -->
+        <!-- 2) # Name sort -->
         <test>
             <param name="input1" value="test_input_1_a.bam" ftype="bam" />
             <conditional name="prim_key_cond">
                 <param name="prim_key_select" value="-n"/>
             </conditional>
-            <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam"/>
+            <output name="output1" file="name.sort.expected.bam" ftype="qname_sorted.bam" lines_diff="4"/>
         </test>
-	<!--    test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
-
-        <!-- # Tag sort (RG) (considers output and name sorted) -->
+	    <!-- test_cmd($opts, out=>"sort/name.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
+        <!-- 3) # Tag sort (RG) (considers output and name sorted) -->
         <test>
             <param name="input1" value="test_input_1_a.bam" ftype="bam" />
             <conditional name="prim_key_cond">
                 <param name="prim_key_select" value="-t"/>
                 <param name="tag" value="RG"/>
             </conditional>
-            <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam"/>
+            <output name="output1" file="tag.rg.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
         </test>
-    <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
-
-        <!-- # Tag sort (RG); secondary by name -->
+        <!--test_cmd($opts, out=>"sort/tag.rg.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t RG  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
+        <!-- 4) # Tag sort (RG); secondary by name -->
         <test>
             <param name="input1" value="test_input_1_a.bam" ftype="bam" />
             <conditional name="prim_key_cond">
@@ -92,11 +91,10 @@
                 <param name="tag" value="RG"/>
                 <param name="sec_key_select" value="-n"/>
             </conditional>
-            <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam"/>
+            <output name="output1" file="tag.rg.n.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
         </test>
-    <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
-
-        <!-- # Tag sort (AS) -->
+        <!--test_cmd($opts, out=>"sort/tag.rg.n.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -n -t RG  $$opts{path}/dat/test_input_1_a.bam -O SAM -o -");-->
+        <!-- 5) # Tag sort (AS) -->
         <test>
             <param name="input1" value="test_input_1_a.bam" ftype="bam" />
             <conditional name="prim_key_cond">
@@ -104,40 +102,44 @@
                 <param name="tag" value="AS"/>
                 <param name="sec_key_select" value=""/>
             </conditional>
-            <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam"/>
+            <output name="output1" file="tag.as.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
         </test>
-    <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
-
-        <!-- # Tag sort (FI) -->
+        <!--test_cmd($opts, out=>"sort/tag.as.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t AS $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
+        <!-- 6) # Tag sort (FI) -->
         <test>
             <param name="input1" value="test_input_1_a.bam" ftype="bam" />
             <conditional name="prim_key_cond">
                 <param name="prim_key_select" value="-t"/>
                 <param name="tag" value="FI"/>
             </conditional>
-            <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam"/>
+            <output name="output1" file="tag.fi.sort.expected.bam" ftype="unsorted.bam" lines_diff="4"/>
         </test>
-    <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
-
-        <!-- tests from old version -->
+        <!--test_cmd($opts, out=>"sort/tag.fi.sort.expected.sam", cmd=>"$$opts{bin}/samtools sort${threads} -t FI $$opts{path}/dat/test_input_1_d.sam -O SAM -o -");-->
+        <!-- 7) tests from old version -->
         <test>
             <param name="input1" value="1.bam" ftype="bam" />
-            <output name="output1" file="1_sort.bam" ftype="bam" sort="True"/>
+            <output name="output1" file="1_sort.bam" ftype="bam" sort="True" lines_diff="4"/>
         </test>
         <test>
             <param name="input1" value="1.bam" ftype="bam" />
             <conditional name="prim_key_cond">
                 <param name="prim_key_select" value="-n"/>
             </conditional>
-            <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam"/>
+            <output name="output1" file="1_sort_read_names.bam" ftype="qname_sorted.bam" lines_diff="4"/>
+        </test>
+        <!-- 8) test minhash sorting -->
+        <test>
+            <param name="input1" value="test_input_1_a.bam" ftype="bam" />
+            <param name="minhash" value="true" />
+            <output name="output1" file="minhash.expected.bam" ftype="bam" lines_diff="4" />
         </test>
     </tests>
     <help>
 **What it does**

-Sort alignments by leftmost coordinates, or by read name when -n is used.
-An appropriate @HD-SO sort order header tag will be added or an existing
-one updated if necessary.
+Sort alignments by leftmost coordinates, or by read name when -n is used.
+An appropriate @HD-SO sort order header tag will be added or an existing
+one updated if necessary.

 **Ordering Rules**

@@ -156,13 +158,15 @@
 - String tags (types H and Z) are compared based on the binary contents of the
   tag using the C strcmp(3) function.
 - Character tags (type A) are compared by binary character value.
-- No attempt is made to compare tags of other types — notably type B array values will not be compared.
+- No attempt is made to compare tags of other types — notably type B array values will not be compared.

 When the -n option is present, records are sorted by name. Names are compared so as to give a “natural” ordering — i.e. sections consisting of digits are compared numerically while all other sections are compared based on their binary representation. This means “a1” will come before “b1” and “a9” will come before “a10”. Records with the same name will be ordered according to the values of the READ1 and READ2 flags (see flags).

 When the -n option is not present, reads are sorted by reference (according to the order of the @SQ header records), then by position in the reference, and then by the REVERSE flag.

-This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
+This has now been removed. The previous out.prefix argument (and -f option, if any) should be changed to an appropriate combination of -T PREFIX and -o FILE. The previous -o option should be removed, as output defaults to standard output.
+
+When the -M (minash collation) option is present, then samtools sort groups unmapped reads with similar sequence together. This can sometimes significantly reduce the file size.

     </help>
     <expand macro="citations"/>
Binary file test-data/1_sort.bam has changed
Binary file test-data/1_sort_read_names.bam has changed
Binary file test-data/minhash.expected.bam has changed
Binary file test-data/name.sort.expected.bam has changed
Binary file test-data/pos.sort.expected.bam has changed
Binary file test-data/tag.as.sort.expected.bam has changed
Binary file test-data/tag.fi.sort.expected.bam has changed
Binary file test-data/tag.rg.n.sort.expected.bam has changed
Binary file test-data/tag.rg.sort.expected.bam has changed