diff artbio_bam_cleaning.xml @ 7:745f529127b8 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/artbio_bam_cleaning commit b782130b62b7c74911774b58c7a965a99dee1519"
author artbio
date Mon, 20 Dec 2021 19:44:29 +0000
parents 999c2b871f36
children b12e50bcddd2
line wrap: on
line diff
--- a/artbio_bam_cleaning.xml	Wed Apr 07 01:31:51 2021 +0000
+++ b/artbio_bam_cleaning.xml	Mon Dec 20 19:44:29 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="artbio_bam_cleaning" name="ARTbio bam cleaning" version="1.6+galaxy6">
+<tool id="artbio_bam_cleaning" name="ARTbio bam cleaning" version="1.7+galaxy0">
     <description>
         on flags and PCR Duplicates and MD recalibration
     </description>
@@ -6,9 +6,9 @@
         <import>macro.xml</import>
     </macros>
     <requirements>
-        <requirement type="package" version="1.6">samtools</requirement>
-        <requirement type="package" version="0.7.1">sambamba</requirement>
-        <requirement type="package" version="1.3.2">freebayes</requirement>
+        <requirement type="package" version="1.6=hb116620_7">samtools</requirement>
+        <requirement type="package" version="0.8.1=h41abebc_0">sambamba</requirement>
+        <requirement type="package" version="1.3.5=py39hba5d119_3">freebayes</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" description="Error occured" />
@@ -19,15 +19,12 @@
     #set input_base = 'input'   
     ln -f -s $input_bam.metadata.bam_index input.bam.bai &&
     ln -s $input_bam input.bam &&
-    sambamba view -h -t \${GALAXY_SLOTS:-2} --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam"
-    #if $skip_rmdup == 'no':
-        | samtools rmdup -s - - | tee $input_base".filt1.dedup.bam"
-    #end if
+    sambamba view -h -t \${GALAXY_SLOTS:-2} --filter="mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped) and not(duplicate)" -f "bam" ${input_base}".bam"
     | bamleftalign --fasta-reference reference.fa -c --max-iterations "5" -
     | samtools calmd  -C 50 -b -@ \${GALAXY_SLOTS:-2} - reference.fa
-    #if $skip_laststep == 'yes':
+    #if $filter_MQ_255 == 'no':
         > $calmd
-    #else if $skip_laststep == 'no':
+    #else if $filter_MQ_255 == 'yes':
         | tee $calmd
         | sambamba view -h -t \${GALAXY_SLOTS:-2} --filter='mapping_quality <= 254' -f 'bam' /dev/stdin > $fullfilter
     #end if
@@ -35,62 +32,69 @@
     <inputs>
         <expand macro="reference_source_conditional" />
         <param name="input_bam" type="data" format="bam" label="BAM or SAM file to process"/>
-        <param name="skip_rmdup" type="select" label="skip remove pcr duplicate step ?" display="radio"
-               help="useful if duplicates are already marked by other tools">
-            <option value="no" selected="true">No</option>
-            <option value="yes">Yes</option>
-        </param>
-        <param name="skip_laststep" type="select" label="skip last samtool view filter ?" display="radio"
-               help="Only generate the calMD output">
-            <option value="no" selected="true">No</option>
-            <option value="yes">Yes</option>
+        <param name="filter_MQ_255" type="select" label="Discard alignments with mapping quality &gt; 254"
+               display="radio"
+               help="If `No`, generates the calMD output without discarding aberrant MQs
+                     generated by the step. Useful if you need to keep split reads that
+                     we be eliminated if `Yes`">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
         </param>
     </inputs>
     <outputs>
         <data name="calmd" format="bam" label="CalMD filter (for lumpy-smoove)" />
         <data name="fullfilter" format="bam" label="Full filtering (for somatic-varscan)">
-            <filter>skip_laststep == "no"</filter>
+            <filter>filter_MQ_255 == "yes"</filter>
         </data>
     </outputs>
     <tests>
         <test>
-            <param name="input_bam" value="match_chr21_DBA_974.bam" ftype="bam" />
+            <param name="input_bam" value="chr22_sample.bam" ftype="bam" />
             <param name="reference_source_selector" value="history" />
-            <param name="ref_file" value="chr21.fa" />
+            <param name="ref_file" value="chr22.fa" />
             <output name="calmd" file="calmd.bam" ftype="bam" />
             <output name="fullfilter" file="full.bam" ftype="bam" />
         </test>
         <test>
-            <param name="input_bam" value="match_chr21_DBA_974.bam" ftype="bam" />
+            <param name="input_bam" value="chr22_sample.bam" ftype="bam" />
             <param name="reference_source_selector" value="history" />
-            <param name="skip_rmdup" value="yes" />
-            <param name="ref_file" value="chr21.fa" />
-            <output name="calmd" file="normdup_calmd.bam" ftype="bam" />
-            <output name="fullfilter" file="normdup_full.bam" ftype="bam" />
+            <param name="filter_MQ_255" value="yes" />
+            <param name="ref_file" value="chr22.fa" />
+            <output name="calmd" file="calmd.bam" ftype="bam" />
         </test>
     </tests>
     <help>
 ARTbio bam cleaning overview
 ============================
 
+.. class:: infomark
+
 This tool is wrapping several cleaning steps to produce bam files suitable for subsequent
 analyses with lumpy-smoove (or other large structural variation callers) or with
-somatic-varscan (or small structural variation callers)
+somatic-varscan (or other small structural variation callers)
 
 
 Workflow 
 =============
 
+.. class:: infomark
+
 The tool is using the following command line for filtering:
 
 ::
 
-    sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam"
-    &#124; samtools rmdup - -
-    &#124;tee $input_base".filt1.dedup.bam" &#124; bamleftalign --fasta-reference reference.fa -c --max-iterations "5" -
+    sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped) and not(duplicate)' -f 'bam' $input_base".bam"
+    &#124; bamleftalign --fasta-reference reference.fa -c --max-iterations "5" -
     &#124; samtools calmd  -C 50 -b -@ 4 - reference.fa &gt; $input_base".filt1.dedup.bamleft.calmd.bam" ;
     sambamba view -h -t 8 --filter='mapping_quality &lt;&#61; 254' -f 'bam' -o $input_base".filt1.dedup.bamleft.calmd.filt2.bam" $input_base".filt1.dedup.bamleft.calmd.bam"
     
+.. class:: warningmark
+
+From version **1.7+galaxy0**, this tool assumes that the input bam already has its
+optical/PCR duplicate alignments marked appropriately in their flag value. If it is not the
+case, it may be necessary to use tool that perform this job, for instance samtools markdup,
+or sambamba markdup.
+
 Purpose
 --------