Mercurial > repos > artbio > artbio_bam_cleaning
comparison artbio_bam_cleaning.xml @ 0:65d6d2b554b3 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/artbio_bam_cleaning commit adfad19ff505ac7baa3688997bfa9f64243aaace"
author | artbio |
---|---|
date | Fri, 02 Oct 2020 00:17:33 +0000 |
parents | |
children | b550841f568b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:65d6d2b554b3 |
---|---|
1 <tool id="artbio_bam_cleaning" name="ARTbio bam cleaning" version="1.6+galaxy0"> | |
2 <description> | |
3 on flags and PCR Duplicates and MD recalibration | |
4 </description> | |
5 <macros> | |
6 <import>macro.xml</import> | |
7 </macros> | |
8 <requirements> | |
9 <requirement type="package" version="1.6">samtools</requirement> | |
10 <requirement type="package" version="0.7.1">sambamba</requirement> | |
11 <requirement type="package" version="1.3.2">freebayes</requirement> | |
12 </requirements> | |
13 <stdio> | |
14 <exit_code range="1:" level="fatal" description="Error occured" /> | |
15 </stdio> | |
16 <command detect_errors="exit_code"><![CDATA[ | |
17 @pipefail@ | |
18 @set_fasta_index@ | |
19 #set input_base = 'input' | |
20 ln -f -s $input_bam.metadata.bam_index input.bam.bai && | |
21 ln -s $input_bam input.bam && | |
22 sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam" | |
23 | samtools rmdup - - | |
24 |tee $input_base".filt1.dedup.bam"| bamleftalign --fasta-reference reference.fa -c --max-iterations "5" - | |
25 | samtools calmd -C 50 -b -@ \${GALAXY_SLOTS:-2} - reference.fa > $input_base".filt1.dedup.bamleft.calmd.bam" && | |
26 sambamba view -h -t 8 --filter='mapping_quality <= 254' -f 'bam' -o $input_base".filt1.dedup.bamleft.calmd.filt2.bam" $input_base".filt1.dedup.bamleft.calmd.bam" | |
27 ]]></command> | |
28 <inputs> | |
29 <expand macro="reference_source_conditional" /> | |
30 <param name="input_bam" type="data" format="bam" label="BAM or SAM file to process"/> | |
31 </inputs> | |
32 <outputs> | |
33 <data name="calmd" format="bam" label="CalMD filter (for lumpy-smoove)" from_work_dir="./input.filt1.dedup.bamleft.calmd.bam"/> | |
34 <data name="fullfilter" format="bam" label="Full filtering (for somatic-varscan)" from_work_dir="./input.filt1.dedup.bamleft.calmd.filt2.bam"/> | |
35 </outputs> | |
36 <tests> | |
37 <test> | |
38 <param name="input_bam" value="match_chr21_DBA_974.bam" ftype="bam" /> | |
39 <param name="reference_source_selector" value="history" /> | |
40 <param name="ref_file" value="chr21.fa" /> | |
41 <output name="calmd" file="match_chr21_DBA_974.filt1.dedup.bamleft.calmd.bam" ftype="bam" /> | |
42 <output name="fullfilter" file="match_chr21_DBA_974.filt1.dedup.bamleft.calmd.filt2.bam" ftype="bam" /> | |
43 </test> | |
44 </tests> | |
45 <help> | |
46 ARTbio bam cleaning overview | |
47 ============================ | |
48 | |
49 This tool is wrapping several cleaning steps to produce bam files suitable for subsequent | |
50 analyses with lumpy-smoove (or other large structural variation callers) or with | |
51 somatic-varscan (or small structural variation callers) | |
52 | |
53 | |
54 Workflow | |
55 ============= | |
56 | |
57 The tool is using the following command line for filtering: | |
58 | |
59 :: | |
60 | |
61 sambamba view -h -t 8 --filter='mapping_quality >= 1 and not(unmapped) and not(mate_is_unmapped)' -f 'bam' $input_base".bam" | |
62 | samtools rmdup - - | |
63 |tee $input_base".filt1.dedup.bam" | bamleftalign --fasta-reference reference.fa -c --max-iterations "5" - | |
64 | samtools calmd -C 50 -b -@ 4 - reference.fa > $input_base".filt1.dedup.bamleft.calmd.bam" ; | |
65 sambamba view -h -t 8 --filter='mapping_quality <= 254' -f 'bam' -o $input_base".filt1.dedup.bamleft.calmd.filt2.bam" $input_base".filt1.dedup.bamleft.calmd.bam" | |
66 | |
67 Purpose | |
68 -------- | |
69 | |
70 This "workflow" tool was generated in order to limit the number of ``python metadata/set.py`` jobs | |
71 which occur at each step of standard galaxy workflows. Indeed, these jobs are poorly optimized and may last considerable | |
72 amounts of time when datasets are large, at each step, lowering the overall performance of the workflow. | |
73 | |
74 </help> | |
75 <citations> | |
76 <citation type="doi">10.1371/journal.pone.0168397</citation> | |
77 </citations> | |
78 </tool> |