changeset 2:7ad3484aa5db draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit ea1ce99d707fcf2d64f6f2cd96fdd1754f344df1"
author artbio
date Thu, 30 Dec 2021 02:06:08 +0000
parents 6195f15d4541
children de833cc76a8e
files sambamba.xml test-data/c1215_fixmate_filtered.sam test-data/c1215_fixmate_region-filtered.sam test-data/c1215_fixmate_region-sliced.bam test-data/c1215_fixmate_sampled.sam test-data/ex1_header_filtered.bam test-data/ex1_header_sampled.bam
diffstat 7 files changed, 52 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/sambamba.xml	Mon May 25 17:10:17 2020 -0400
+++ b/sambamba.xml	Thu Dec 30 02:06:08 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.6">
+<tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.7.1+galaxy0">
     <description>
         on flags, fields, and tags using Sambamba
     </description>
@@ -12,30 +12,38 @@
     <command detect_errors="exit_code"><![CDATA[
         ln -s $input input.bam &&
         ln -s $input.metadata.bam_index input.bai &&
+
         #if $sambamba_options.selector == 'filter'
             sambamba view -h -t \${GALAXY_SLOTS:-4}
             #if $sambamba_options.query != '':
                 --filter='$sambamba_options.query'
             #end if
-                -f '$format' -o $outfile input.bam $sambamba_options.region
-         #else
-             sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format'
-             --subsampling-seed='$sambamba_options.seed'
-             -s '$sambamba_options.fraction' -o '$outfile' input.bam
-         #end if
+                -f '$sambamba_options.format'
+                -o $outfile input.bam $sambamba_options.region
+
+        #else if $sambamba_options.selector == 'sample'
+            sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$sambamba_options.format'
+                 --subsampling-seed='$sambamba_options.seed'
+                 -s '$sambamba_options.fraction'
+                 -o '$outfile' input.bam
+
+        #else
+            sambamba slice -o '$outfile' input.bam $sambamba_options.region
+        #end if
     ]]></command>
     <inputs>
-        <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/>
-        <param name="format" type="select" label="format of the tool output">
-            <option value="bam">BAM</option>
-            <option value="sam">SAM</option>
-        </param>
+        <param name="input" type="data" format="bam" label="BAM or SAM input file(s)"/>
         <conditional name="sambamba_options">
-            <param name="selector" type="select" label="Filter or Down-sample alignments">
+            <param name="selector" type="select" label="Filter, Down-sample or slice alignments">
                 <option value="sample">Down-sample bam or sam alignments</option>
                 <option value="filter" selected="true">Filter bam or sam alignements</option>
+                <option value="slice">Slice bam or sam alignements by chromosome or chromosome region</option>
             </param>
             <when value="filter">
+                <param name="format" type="select" label="format of the tool output">
+                    <option value="bam">BAM</option>
+                    <option value="sam">SAM</option>
+                </param>
                 <param name="query" type="text" size="80">
                     <sanitizer invalid_char="X">
                         <valid initial="string.ascii_letters,string.digits, string.punctuation">
@@ -54,7 +62,24 @@
                     </help>
                 </param>
             </when>
+
+            <when value="slice">
+                <param name="region" type="text" size="40" label="Region in format chr:beg-end">
+                    <help>
+                    Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000'
+                    (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000'
+                    (region between 1,000,000 and 2,000,000bp including the end points).
+                    The coordinates are 1-based. NOTE that sambamba slice only outputs
+                    BAM-format datasets.
+                    </help>
+                </param>
+            </when>
+
             <when value="sample">
+                <param name="format" type="select" label="format of the tool output">
+                    <option value="bam">BAM</option>
+                    <option value="sam">SAM</option>
+                </param>
                 <param name="seed" type="integer" value="123" size="10">
                     <label>Seed value for randomisation</label>
                     <help>
@@ -72,9 +97,9 @@
         </conditional>
     </inputs>
     <outputs>
-        <data name="outfile" format="bam">
+        <data name="outfile" format="bam" label="Filter, slice or sample on ${on_string}">
             <change_format>
-                <when input="format" value="sam" format="sam" />
+                <when input="sambamba_options['format']" value="sam" format="sam" />
             </change_format>
         </data>
     </outputs>
@@ -85,7 +110,7 @@
             <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
             <param name="format" value="bam" />
             <param name="region" value="" />
-            <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" />
+            <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" lines_diff="2"/>
         </test>
         <test>
             <param name="input" value="c1215_fixmate.bam" ftype="bam" />
@@ -103,13 +128,20 @@
             <param name="region" value="AL096846:1000-5000" />
             <output name="outfile" file="c1215_fixmate_region-filtered.sam" ftype="sam" lines_diff="2"/>
         </test>
+        <test>
+            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
+            <param name="selector" value="slice" />
+            <param name="query" value='' />
+            <param name="region" value="AL096846:1000-2000 AL096846:4000-5000" />
+            <output name="outfile" file="c1215_fixmate_region-sliced.bam" ftype="bam"/>
+        </test>
          <test>
             <param name="input" value="ex1_header.sam" ftype="sam" />
             <param name="selector" value="sample" />
             <param name="seed" value="123" />
             <param name="fraction" value="0.1" />
             <param name="format" value="bam" />
-            <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" />
+            <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" lines_diff="2"/>
         </test>
         <test>
             <param name="input" value="c1215_fixmate.bam" ftype="bam" />
--- a/test-data/c1215_fixmate_filtered.sam	Mon May 25 17:10:17 2020 -0400
+++ b/test-data/c1215_fixmate_filtered.sam	Thu Dec 30 02:06:08 2021 +0000
@@ -1,5 +1,5 @@
 @HD	VN:1.3	SO:coordinate
-@PG	ID:sambamba	CL:view -h -t 1 --filter=[MD] =~ /^\d+T\d+A\d+/ and first_of_pair -f sam -o /tmp/tmpzrv_byjx/files/a/d/c/dataset_adced56a-6b20-49ec-ae55-b745ffd1b703.dat input.bam AL096846:1000-5000	VN:1.0
+@PG	ID:sambamba	CL:view -h -t 1 --filter=[MD] =~ /^\d+T\d+A\d+/ and first_of_pair -f sam -o /tmp/tmp1mtgiw55/files/f/d/4/dataset_fd46ac7b-c6fd-430d-b562-c610caa9f3ac.dat input.bam AL096846:1000-5000	VN:1.0
 AL096846_1043_1176_1df2	83	AL096846	1142	60	35M	=	1043	-134	AAGGCACCTACTTTGACATTCGCGTCTCTCTTAGC	<2<<).<<4<5<<<<;<<<<<<77<;<<<<<<<<<	XT:A:U	NM:i:2	X0:i:1	X1:i:0	XM:i:2	XO:i:0	XG:i:0	MD:Z:4T8A21
 AL096846_1080_1249_1915	83	AL096846	1215	60	35M	=	1080	-170	CTGACTTGTCGACGGATTTGACTAAGAAAATTTTA	<<<:5:;77;<<&&<<<<<<<<<<<<<<<<<<<<<	XT:A:U	NM:i:2	X0:i:1	X1:i:0	XM:i:2	XO:i:0	XG:i:0	MD:Z:2T9A22
 AL096846_1123_1300_24a3	83	AL096846	1268	37	2S33M	=	1123	-178	TGCCGTTATCGCTTAGCAGATTCTAGTCAATTGCC	:<:5<:;&..<7;<<<<<<<<<<<;<<<<<<<<<<	XT:A:M	NM:i:2	XM:i:2	XO:i:0	XG:i:0	MD:Z:5T8A18
--- a/test-data/c1215_fixmate_region-filtered.sam	Mon May 25 17:10:17 2020 -0400
+++ b/test-data/c1215_fixmate_region-filtered.sam	Thu Dec 30 02:06:08 2021 +0000
@@ -1,5 +1,5 @@
 @HD	VN:1.3	SO:coordinate
-@PG	ID:sambamba	CL:view -h -t 1 -f sam -o /tmp/tmpfvgji5f3/files/2/1/d/dataset_21dea8eb-5b79-402e-9a05-26eaacdbcb22.dat input.bam AL096846:1000-5000	VN:1.0
+@PG	ID:sambamba	CL:view -h -t 1 -f sam -o /tmp/tmp1mtgiw55/files/7/e/8/dataset_7e81fe9d-8d41-427b-b591-ac666118bdc0.dat input.bam AL096846:1000-5000	VN:1.0
 AL096846_966_1145_3fe	163	AL096846	966	60	35M	=	1111	180	TAGTTTCATACTAATAAAGACGATTGTTTACTTTA	<<<<<<99<<;<<<<<<<<<<<;<<<<<<<:<<::	XT:A:U	NM:i:0	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:35
 AL096846_966_1131_e80	163	AL096846	966	60	35M	=	1097	166	TAGTTTCATACTAATAAAGACGATTGTTTACTTTA	<<<<<;<<<<<<<<<<3<<<<<<<<<;7;<;::5.	XT:A:U	NM:i:0	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:35
 AL096846_861_1000_1bf6	83	AL096846	966	60	35M	=	861	-140	TATTTTCATACTAATAAAGACGATTGTTTACTTTA	84&:&<<<<<<:7<<<<<<<<<<<<<<<<<<<<<<	XT:A:U	NM:i:1	X0:i:1	X1:i:0	XM:i:1	XO:i:0	XG:i:0	MD:Z:2G32
Binary file test-data/c1215_fixmate_region-sliced.bam has changed
--- a/test-data/c1215_fixmate_sampled.sam	Mon May 25 17:10:17 2020 -0400
+++ b/test-data/c1215_fixmate_sampled.sam	Thu Dec 30 02:06:08 2021 +0000
@@ -1,5 +1,5 @@
 @HD	VN:1.3	SO:coordinate
-@PG	ID:sambamba	CL:view -h -t 1 -f sam --subsampling-seed=123 -s 0.1 -o /tmp/tmpsodu2dip/files/2/b/e/dataset_2be4d6d2-9c36-480e-a69b-61d0f4ba66d5.dat input.bam	VN:1.0
+@PG	ID:sambamba	CL:view -h -t 1 -f sam --subsampling-seed=123 -s 0.1 -o /tmp/tmp1mtgiw55/files/1/2/e/dataset_12e22434-7749-4511-b122-b93c9487e848.dat input.bam	VN:1.0
 AL096846_2_151_2180	99	AL096846	2	60	35M	=	117	150	ATATATAATTTAATAAATACATTCCGACGATACTG	<<<<<<<<<<<<<<<:(<<<<<<<<<<:<<:5<;;	XT:A:U	NM:i:0	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:35
 AL096846_11_168_117e	163	AL096846	11	60	35M	=	134	158	TTAATAAATACATTCCGACGATACTGCCTCTATGG	<<<<<<<<<<<<<<,2<<<<<<<<<<7<<<:<<<6	XT:A:U	NM:i:0	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:35
 AL096846_15_201_212	163	AL096846	15	60	35M	=	167	187	TAAATACATTCCGACGATACTGCCTCTATGGCTTA	8:<<<<<<<<<<;<<<;;<<<<<<<994<);<<29	XT:A:U	NM:i:0	X0:i:1	X1:i:0	XM:i:0	XO:i:0	XG:i:0	MD:Z:35
Binary file test-data/ex1_header_filtered.bam has changed
Binary file test-data/ex1_header_sampled.bam has changed