Mercurial > repos > artbio > artbio_bam_cleaning
diff artbio_bam_cleaning.xml @ 0:65d6d2b554b3 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/artbio_bam_cleaning commit adfad19ff505ac7baa3688997bfa9f64243aaace"
author | artbio |
---|---|
date | Fri, 02 Oct 2020 00:17:33 +0000 |
parents | |
children | b550841f568b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/artbio_bam_cleaning.xml Fri Oct 02 00:17:33 2020 +0000 @@ -0,0 +1,78 @@ +<tool id="artbio_bam_cleaning" name="ARTbio bam cleaning" version="1.6+galaxy0"> + <description> + on flags and PCR Duplicates and MD recalibration + </description> + <macros> + <import>macro.xml</import> + </macros> + <requirements> + <requirement type="package" version="1.6">samtools</requirement> + <requirement type="package" version="0.7.1">sambamba</requirement> + <requirement type="package" version="1.3.2">freebayes</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Error occured" /> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + @pipefail@ + @set_fasta_index@ + #set input_base = 'input' + ln -f -s $input_bam.metadata.bam_index input.bam.bai && + ln -s $input_bam input.bam && + sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam" + | samtools rmdup - - + |tee $input_base".filt1.dedup.bam"| bamleftalign --fasta-reference reference.fa -c --max-iterations "5" - + | samtools calmd -C 50 -b -@ \${GALAXY_SLOTS:-2} - reference.fa > $input_base".filt1.dedup.bamleft.calmd.bam" && + sambamba view -h -t 8 --filter='mapping_quality <= 254' -f 'bam' -o $input_base".filt1.dedup.bamleft.calmd.filt2.bam" $input_base".filt1.dedup.bamleft.calmd.bam" + ]]></command> + <inputs> + <expand macro="reference_source_conditional" /> + <param name="input_bam" type="data" format="bam" label="BAM or SAM file to process"/> + </inputs> + <outputs> + <data name="calmd" format="bam" label="CalMD filter (for lumpy-smoove)" from_work_dir="./input.filt1.dedup.bamleft.calmd.bam"/> + <data name="fullfilter" format="bam" label="Full filtering (for somatic-varscan)" from_work_dir="./input.filt1.dedup.bamleft.calmd.filt2.bam"/> + </outputs> + <tests> + <test> + <param name="input_bam" value="match_chr21_DBA_974.bam" ftype="bam" /> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="chr21.fa" /> + <output name="calmd" file="match_chr21_DBA_974.filt1.dedup.bamleft.calmd.bam" ftype="bam" /> + <output name="fullfilter" file="match_chr21_DBA_974.filt1.dedup.bamleft.calmd.filt2.bam" ftype="bam" /> + </test> + </tests> + <help> +ARTbio bam cleaning overview +============================ + +This tool is wrapping several cleaning steps to produce bam files suitable for subsequent +analyses with lumpy-smoove (or other large structural variation callers) or with +somatic-varscan (or small structural variation callers) + + +Workflow +============= + +The tool is using the following command line for filtering: + +:: + + sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam" + | samtools rmdup - - + |tee $input_base".filt1.dedup.bam" | bamleftalign --fasta-reference reference.fa -c --max-iterations "5" - + | samtools calmd -C 50 -b -@ 4 - reference.fa > $input_base".filt1.dedup.bamleft.calmd.bam" ; + sambamba view -h -t 8 --filter='mapping_quality <= 254' -f 'bam' -o $input_base".filt1.dedup.bamleft.calmd.filt2.bam" $input_base".filt1.dedup.bamleft.calmd.bam" + +Purpose +-------- + +This "workflow" tool was generated in order to limit the number of ``python metadata/set.py`` jobs +which occur at each step of standard galaxy workflows. Indeed, these jobs are poorly optimized and may last considerable +amounts of time when datasets are large, at each step, lowering the overall performance of the workflow. + + </help> + <citations> + <citation type="doi">10.1371/journal.pone.0168397</citation> + </citations> +</tool>