diff samtools_view.xml @ 2:02afb5ea2fce draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit 1047dc0e36a1c28492d7cd1e860c89ecf3ba30c8
author iuc
date Thu, 27 Sep 2018 08:51:16 -0400
parents 6ade12d94f9a
children fdc58a0cc1d4
line wrap: on
line diff
--- a/samtools_view.xml	Tue Sep 25 04:21:24 2018 -0400
+++ b/samtools_view.xml	Thu Sep 27 08:51:16 2018 -0400
@@ -3,7 +3,9 @@
     <macros>
     <import>macros.xml</import>
     </macros>
-    <expand macro="requirements"/>
+    <expand macro="requirements">
+        <requirement type="package">gawk</requirement>
+    </expand>
     <expand macro="stdio"/>
     <expand macro="version_command"/>
     <command><![CDATA[
@@ -11,7 +13,19 @@
         ## prepare reference data
         @PREPARE_FASTA_IDX@
         @PREPARE_IDX@
-
+        #if $cond_subsample.select_subsample == 'target':
+            ##this must be done before the main command because we don't know the total # reads until execution time
+            #if str($cond_subsample.seed) == '':
+                #set seed_expr="int(rand() * 32768)"
+            #else
+                #set seed_expr=$cond_subsample.seed
+            #end if
+            #if $input.is_of_type('sam'):
+                sample_fragment=`samtools view -c infile | awk '{s=\$1} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s "  $seed_expr+frac : "")}'` &&
+            #else
+                sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s " $seed_expr+frac : "")}'` &&
+            #end if
+        #end if
         ##call samtools view
         samtools view
         -@ \$addthreads
@@ -62,12 +76,17 @@
                 -x '${s.readtag}'
             #end for
         #end if
-        #if $cond_subsample.select_subsample == 'yes':
-            #set fraction=str($cond_subsample.subsample).split('.')[1]
-            #if str($cond_subsample.seed) == '':
-                -s "\${RANDOM}".$fraction
+        #if $cond_subsample.select_subsample != 'no':
+            #if $cond_subsample.select_subsample == "target":
+                ##this is calculated at execution time before the main samtools command
+                "\${sample_fragment}"
             #else
-                -s $cond_subsample.seed.$fraction
+                #set fraction=str($cond_subsample.fraction).split('.')[1]
+                #if  str($cond_subsample.seed) == '':
+                    -s "\${RANDOM}".$fraction
+                #else
+                    -s $cond_subsample.seed.$fraction
+                #end if
             #end if
         #end if
 
@@ -181,12 +200,17 @@
         </conditional>
         <conditional name="cond_subsample">
             <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
-                <option value="yes">Yes</option>
-                <option value="no" selected="True">No</option>
+                <option value="fraction">Specify a fraction to keep</option>
+                <option value="target">Specify a target # of reads</option>
+                <option value="no" selected="True">No downsampling</option>
             </param>
-            <when value="yes">
-                <param name="subsample" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
-                <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
+            <when value="fraction">
+                <param name="fraction" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
+                <expand macro="seed_input" />
+            </when>
+            <when value='target'>
+                <param name="target" type="integer" optional="False" value="" min="0"  label="Target # of reads" help="sets the target number of reads to subsample." />
+                <expand macro="seed_input" />
             </when>
             <when value="no"/>
         </conditional>
@@ -383,6 +407,46 @@
             </conditional>
             <output name="outputsam" file="sam_to_bam_out2.bam" ftype="bam" />
         </test>
+        <!-- sampling options-->
+        <test>
+            <param name="input" value="test.sam" ftype="sam" />
+            <param name="outtype" value="sam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
+        </test>
+        <test>
+            <param name="input" value="test.sam" ftype="sam" />
+            <param name="outtype" value="sam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="seed" value="7" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="4" />
+        </test>
+        <test>
+            <param name="input" value="test.bam" ftype="bam" />
+            <param name="outtype" value="bam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="seed" value="7" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.bam" ftype="bam" />
+        </test>        
+        <test>
+            <param name="input" value="test.bam" ftype="bam" />
+            <param name="outtype" value="bam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="fraction" />
+                <param name="seed" value="7" />
+                <param name="fraction" value=".2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.bam" ftype="bam" />
+        </test>        
     </tests>
     <help>
 **What it does**