changeset 2:02afb5ea2fce draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_view commit 1047dc0e36a1c28492d7cd1e860c89ecf3ba30c8
author iuc
date Thu, 27 Sep 2018 08:51:16 -0400
parents 6ade12d94f9a
children b91e0f76afe9
files macros.xml samtools_view.xml test-data/test_ds.bam test-data/test_ds.cram test-data/test_ds.sam
diffstat 5 files changed, 87 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Tue Sep 25 04:21:24 2018 -0400
+++ b/macros.xml	Thu Sep 27 08:51:16 2018 -0400
@@ -74,6 +74,9 @@
         addmemory=\${GALAXY_MEMORY_MB_PER_SLOT:-768} && 
         ((addmemory=addmemory*75/100)) &&
     ]]></token>
+    <xml name="seed_input">
+       <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." /> 
+    </xml>
     <xml name="flag_options">
         <option value="1">read is paired</option>
         <option value="2">read is mapped in a proper pair</option>
--- a/samtools_view.xml	Tue Sep 25 04:21:24 2018 -0400
+++ b/samtools_view.xml	Thu Sep 27 08:51:16 2018 -0400
@@ -3,7 +3,9 @@
     <macros>
     <import>macros.xml</import>
     </macros>
-    <expand macro="requirements"/>
+    <expand macro="requirements">
+        <requirement type="package">gawk</requirement>
+    </expand>
     <expand macro="stdio"/>
     <expand macro="version_command"/>
     <command><![CDATA[
@@ -11,7 +13,19 @@
         ## prepare reference data
         @PREPARE_FASTA_IDX@
         @PREPARE_IDX@
-
+        #if $cond_subsample.select_subsample == 'target':
+            ##this must be done before the main command because we don't know the total # reads until execution time
+            #if str($cond_subsample.seed) == '':
+                #set seed_expr="int(rand() * 32768)"
+            #else
+                #set seed_expr=$cond_subsample.seed
+            #end if
+            #if $input.is_of_type('sam'):
+                sample_fragment=`samtools view -c infile | awk '{s=\$1} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s "  $seed_expr+frac : "")}'` &&
+            #else
+                sample_fragment=`samtools idxstats infile | awk '{s+=\$4+\$3} END {frac=$cond_subsample.target/s; print(frac < 1 ? "-s " $seed_expr+frac : "")}'` &&
+            #end if
+        #end if
         ##call samtools view
         samtools view
         -@ \$addthreads
@@ -62,12 +76,17 @@
                 -x '${s.readtag}'
             #end for
         #end if
-        #if $cond_subsample.select_subsample == 'yes':
-            #set fraction=str($cond_subsample.subsample).split('.')[1]
-            #if str($cond_subsample.seed) == '':
-                -s "\${RANDOM}".$fraction
+        #if $cond_subsample.select_subsample != 'no':
+            #if $cond_subsample.select_subsample == "target":
+                ##this is calculated at execution time before the main samtools command
+                "\${sample_fragment}"
             #else
-                -s $cond_subsample.seed.$fraction
+                #set fraction=str($cond_subsample.fraction).split('.')[1]
+                #if  str($cond_subsample.seed) == '':
+                    -s "\${RANDOM}".$fraction
+                #else
+                    -s $cond_subsample.seed.$fraction
+                #end if
             #end if
         #end if
 
@@ -181,12 +200,17 @@
         </conditional>
         <conditional name="cond_subsample">
             <param name="select_subsample" type="select" argument="-s" label="Subsample alignment">
-                <option value="yes">Yes</option>
-                <option value="no" selected="True">No</option>
+                <option value="fraction">Specify a fraction to keep</option>
+                <option value="target">Specify a target # of reads</option>
+                <option value="no" selected="True">No downsampling</option>
             </param>
-            <when value="yes">
-                <param name="subsample" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
-                <param name="seed" type="integer" optional="True" label="Seed for random number generator" help="If empty a random seed is used." />
+            <when value="fraction">
+                <param name="fraction" type="float" optional="False" value="0.5" min="0" max="1" label="Subsampling fraction" help="sets the fraction of templates/pairs to subsample." />
+                <expand macro="seed_input" />
+            </when>
+            <when value='target'>
+                <param name="target" type="integer" optional="False" value="" min="0"  label="Target # of reads" help="sets the target number of reads to subsample." />
+                <expand macro="seed_input" />
             </when>
             <when value="no"/>
         </conditional>
@@ -383,6 +407,46 @@
             </conditional>
             <output name="outputsam" file="sam_to_bam_out2.bam" ftype="bam" />
         </test>
+        <!-- sampling options-->
+        <test>
+            <param name="input" value="test.sam" ftype="sam" />
+            <param name="outtype" value="sam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="6" />
+        </test>
+        <test>
+            <param name="input" value="test.sam" ftype="sam" />
+            <param name="outtype" value="sam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="seed" value="7" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.sam" ftype="sam" compare="diff" lines_diff="4" />
+        </test>
+        <test>
+            <param name="input" value="test.bam" ftype="bam" />
+            <param name="outtype" value="bam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="target" />
+                <param name="seed" value="7" />
+                <param name="target" value="2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.bam" ftype="bam" />
+        </test>        
+        <test>
+            <param name="input" value="test.bam" ftype="bam" />
+            <param name="outtype" value="bam" />
+            <conditional name="cond_subsample">
+                <param name="select_subsample" value="fraction" />
+                <param name="seed" value="7" />
+                <param name="fraction" value=".2" />
+            </conditional>
+            <output name="outputsam" file="test_ds.bam" ftype="bam" />
+        </test>        
     </tests>
     <help>
 **What it does**
Binary file test-data/test_ds.bam has changed
Binary file test-data/test_ds.cram has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_ds.sam	Thu Sep 27 08:51:16 2018 -0400
@@ -0,0 +1,8 @@
+@HD	VN:1.4	SO:unsorted
+@SQ	SN:CHROMOSOME_I	LN:100
+@RG	ID:UNKNOWN	SM:UNKNOWN
+@PG	ID:bowtie2	PN:bowtie2	VN:2.0.0-beta5
+@PG	ID:0	CL:aaaaa/aaa/aaaaa/aaaaaa/aaaaaaaaa/aaa/iuc/package_aaaaaaaaa_x_y/aaaaaaaaaaaa/bin/aaaaaaaaaaaaaaaaa aaaaaaaaaa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.cram aa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.fa -O test	PN:samtools	VN:1.2
+SRR065390.14978392	16	CHROMOSOME_I	2	1	27M1D73M	*	0	0	CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC	RG:Z:UNKNOWN	XG:i:1	XM:i:5	XN:i:0	XO:i:1	AS:i:-18	XS:i:-18	YT:Z:UU
+SRR065390.921023	16	CHROMOSOME_I	3	12	100M	*	0	0	CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA	###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000	RG:Z:UNKNOWN	XG:i:0	XM:i:3	XN:i:0	XO:i:0	AS:i:-6	XS:i:-13	YT:Z:UU
+SRR065390.6023338	0	CHROMOSOME_I	3	1	100M	*	0	0	CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAGCTAC	CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCDDDBCCABB=DABBA?################	RG:Z:UNKNOWN	XG:i:0	XM:i:3	XN:i:0	XO:i:0	AS:i:-6	XS:i:-6	YT:Z:UU