Repository 'samtools_view'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/samtools_view

Changeset 2:02afb5ea2fce (2018-09-27)
Previous changeset 1:6ade12d94f9a (2018-09-25) Next changeset 3:b91e0f76afe9 (2018-10-14)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit 1047dc0e36a1c28492d7cd1e860c89ecf3ba30c8
modified:
macros.xml
samtools_view.xml
added:
test-data/test_ds.bam
test-data/test_ds.cram
test-data/test_ds.sam
b
diff -r 6ade12d94f9a -r 02afb5ea2fce macros.xml
--- a/macros.xml Tue Sep 25 04:21:24 2018 -0400
+++ b/macros.xml Thu Sep 27 08:51:16 2018 -0400
b
@@ -74,6 +74,9 @@
         addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && 
         ((addmemory=addmemory*75/100)) &&
     ]]></token>
+    <xml name="seed_input">
+       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> 
+    </xml>
     <xml name="flag_options">
         <option value="1">read is paired</option>
         <option value="2">read is mapped in a proper pair</option>
b
diff -r 6ade12d94f9a -r 02afb5ea2fce samtools_view.xml
--- a/samtools_view.xml Tue Sep 25 04:21:24 2018 -0400
+++ b/samtools_view.xml Thu Sep 27 08:51:16 2018 -0400
[
@@ -3,7 +3,9 @@
     <macros>
     <import>macros.xml</import>
     </macros>
-    <expand macro="requirements"/>
+    <expand macro="requirements">
+        <requirement type="package">gawk</requirement>
+    </expand>
     <expand macro="stdio"/>
     <expand macro="version_command"/>
     <command><![CDATA[
@@ -11,7 +13,19 @@
         ## prepare reference data
         @PREPARE_FASTA_IDX@
         @PREPARE_IDX@
-
+        #if $cond_subsample.select_subsample == 'target':
+            ##this must be done before the main command because we don't know the total # reads until execution time
+            #if str($cond_subsample.seed) == '':
+                #set seed_expr="int(rand() * 32768)"
+            #else
+                #set seed_expr=$cond_subsample.seed
+            #end if
+            #if $input.is_of_type('sam'):
+                sample_fragment=`samtools view -c infile | awk '{s=\$1} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s "  $seed_expr+frac : "")}'` &&
+            #else
+                sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s " $seed_expr+frac : "")}'` &&
+            #end if
+        #end if
         ##call samtools view
         samtools view
         -@ \$addthreads
@@ -62,12 +76,17 @@
                 -x '${s.readtag}'
             #end for
         #end if
-        #if $cond_subsample.select_subsample == 'yes':
-            #set fraction=str($cond_subsample.subsample).split('.')[1]
-            #if str($cond_subsample.seed) == '':
-                -s "\${RANDOM}".$fraction
+        #if $cond_subsample.select_subsample != 'no':
+            #if $cond_subsample.select_subsample == "target":
+                ##this is calculated at execution time before the main samtools command
+                "\${sample_fragment}"
             #else
-                -s $cond_subsample.seed.$fraction
+                #set fraction=str($cond_subsample.fraction).split('.')[1]
+                #if  str($cond_subsample.seed) == '':
+                    -s "\${RANDOM}".$fraction
+                #else
+                    -s $cond_subsample.seed.$fraction
+                #end if
             #end if
         #end if
 
@@ -181,12 +200,17 @@
         </conditional>
         <conditional name="cond_subsample">
             <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
-                <option value="yes">Yes</option>
-                <option value="no" selected="True">No</option>
+                <option value="fraction">Specify a fraction to keep</option>
+                <option value="target">Specify a target # of reads</option>
+                <option value="no" selected="True">No downsampling</option>
             </param>
-            <when value="yes">
-                <param name="subsample" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
-                <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
+            <when value="fraction">
+                <param name="fraction" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
+                <expand macro="seed_input" />
+            </when>
+            <when value='target'>
+                <param name="target" type="integer" optional="False" value="" min="0"  label="Target # of reads" help="sets the target number of reads to subsample." />
+                <expand macro="seed_input" />
             </when>
             <when value="no"/>
         </conditional>
@@ -383,6 +407,46 @@
             </conditional>
             <output name="outputsam" file="sam_to_bam_out2.bam" ftype="bam" />
         </test>
+        <!-- sampling options-->
+        <test>
+            <param name="input" value="test.sam" ftype="sam" />
+            <param name="outtype" value="sam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
+        </test>
+        <test>
+            <param name="input" value="test.sam" ftype="sam" />
+            <param name="outtype" value="sam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="seed" value="7" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="4" />
+        </test>
+        <test>
+            <param name="input" value="test.bam" ftype="bam" />
+            <param name="outtype" value="bam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="seed" value="7" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.bam" ftype="bam" />
+        </test>        
+        <test>
+            <param name="input" value="test.bam" ftype="bam" />
+            <param name="outtype" value="bam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="fraction" />
+                <param name="seed" value="7" />
+                <param name="fraction" value=".2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.bam" ftype="bam" />
+        </test>        
     </tests>
     <help>
 **What it does**
b
diff -r 6ade12d94f9a -r 02afb5ea2fce test-data/test_ds.bam
b
Binary file test-data/test_ds.bam has changed
b
diff -r 6ade12d94f9a -r 02afb5ea2fce test-data/test_ds.cram
b
Binary file test-data/test_ds.cram has changed
b
diff -r 6ade12d94f9a -r 02afb5ea2fce test-data/test_ds.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_ds.sam Thu Sep 27 08:51:16 2018 -0400
b
@@ -0,0 +1,8 @@
+@HD VN:1.4 SO:unsorted
+@SQ SN:CHROMOSOME_I LN:100
+@RG ID:UNKNOWN SM:UNKNOWN
+@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5
+@PG ID:0 CL:aaaaa/aaa/aaaaa/aaaaaa/aaaaaaaaa/aaa/iuc/package_aaaaaaaaa_x_y/aaaaaaaaaaaa/bin/aaaaaaaaaaaaaaaaa aaaaaaaaaa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.cram aa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.fa -O test PN:samtools VN:1.2
+SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:1 XM:i:5 XN:i:0 XO:i:1 AS:i:-18 XS:i:-18 YT:Z:UU
+SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 RG:Z:UNKNOWN XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-13 YT:Z:UU
+SRR065390.6023338 0 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAGCTAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCDDDBCCABB=DABBA?################ RG:Z:UNKNOWN XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-6 YT:Z:UU