comparison samblaster.xml @ 0:aa72470e14f7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/samblaster commit 82097013a9eb5a6161d400e5b6c493113c440687
author iuc
date Mon, 19 Dec 2016 15:18:40 -0500
parents
children cf4ad98037d9
comparison
equal deleted inserted replaced
-1:000000000000 0:aa72470e14f7
1 <tool id="samblaster" name="samblaster" version="0.1.24">
2 <description>marks duplicates, outputs split reads, discordant read pairs and unmapped reads</description>
3 <requirements>
4 <requirement type="package" version="0.1.24">samblaster</requirement>
5 <requirement type="package" version="0.6.5">sambamba</requirement>
6 </requirements>
7 <version_command>samblaster --version</version_command>
8 <command detect_errors="exit_code"><![CDATA[
9 #if $input.is_of_type('sam'):
10 #set stream="<(sambamba view -S -f bam -t ${GALAXY_SLOTS:-4} -h '%s')" % $input
11 #else:
12 #set stream="'%s'" % $input
13 #end if
14 sambamba view -t \${GALAXY_SLOTS:-4} -h <(sambamba sort -t \${GALAXY_SLOTS:-4} -n $stream -o /dev/stdout) |
15 samblaster
16 $output
17 $discordantFile
18 $splitterFile
19 $unmappedFile
20 $acceptDupMarks
21 $excludeDups
22 $removeDups
23 $addMateTags
24 $compatibility_mode
25 --maxSplitCount '$maxSplitCount'
26 --maxUnmappedBases '$maxUnmappedBases'
27 --minIndelSize '$minIndelSize'
28 --minNonOverlap '$minNonOverlap'
29 --minClipSize '$minClipSize'
30 #if $output != "-o /dev/null":
31 && sambamba sort -o output.bam -l 6 -t \${GALAXY_SLOTS:-4} <(sambamba view -S -f bam output.sam)
32 #end if
33 #if $discordantFile:
34 && sambamba sort -o discordant.bam -l 6 -t \${GALAXY_SLOTS:-4} <(sambamba view -S -f bam discordant.sam)
35 #end if
36 #if $splitterFile:
37 && sambamba sort -o splitter.bam -l 6 -t \${GALAXY_SLOTS:-4} <(sambamba view -S -f bam splitter.sam)
38 #end if
39 ]]></command>
40 <inputs>
41 <param argument="--input" type="data" format="bam,sam"/>
42 <param argument="--output" label="Output bam file for all input alignments" type="boolean" checked="true" truevalue="-o output.sam" falsevalue="-o /dev/null"/>
43 <param argument="--discordantFile" label="Output discordant read pairs?" type="boolean" truevalue="-d discordant.sam" falsevalue=""/>
44 <param argument="--splitterFile" label="Output split reads?" type="boolean" truevalue="-s splitter.sam" falsevalue=""/>
45 <param argument="--unmappedFile" label="Output unmapped/clipped reads as FASTQ?" type="boolean" truevalue="-u unmapped.fastq" falsevalue=""/>
46 <param argument="--acceptDupMarks" label="Accept duplicate marks already in input file instead of looking for duplicates in the input?" type="boolean" truevalue="-a" falsevalue=""/>
47 <param argument="--excludeDups" label="Exclude reads marked as duplicates from discordant, splitter, and/or unmapped file?" type="boolean" truevalue="-a" falsevalue=""/>
48 <param argument="--removeDups" label="Remove duplicates reads from all output files?" help="(Implies --excludeDups)" type="boolean" truevalue="-e" falsevalue=""/>
49 <param argument="--addMateTags" label="Add MC and MQ tags?" type="boolean" truevalue="--addMateTags" falsevalue=""/>
50 <param name="compatibility_mode" argument="-M" label="Run in compatibility mode?" help="Both 0x100 and 0x800 are considered chimeric. Similar to BWA MEM -M option." type="boolean" truevalue="-M" falsevalue="" />
51 <param argument="--maxSplitCount" label="Maximum number of split alignments for a read to be included in splitter file." type="integer" value="2"/>
52 <param argument="--maxUnmappedBases" label="Maximum number of un-aligned bases between two alignments to be included in splitter file." type="integer" value="50" min="1"/>
53 <param argument="--minIndelSize" label="Minimum structural variant feature size for split alignments to be included in splitter file." type="integer" value="50" min="1"/>
54 <param argument="--minNonOverlap" label="Minimum non-overlaping base pairs between two alignments for a read to be included in splitter file." type="integer" value="20" min="1"/>
55 <param argument="--minClipSize" label="Minumum number of bases a mapped read must be clipped to be included in unmapped file." type="integer" value="20" min="1"/>
56 </inputs>
57 <outputs>
58 <data name="output_bam" format="bam" label="samblaster alignments on ${on_string}" from_work_dir="output.bam">
59 <filter>output</filter>
60 </data>
61 <data name="discordant_bam" format="bam" label="samblaster discordant alignments on ${on_string}" from_work_dir="discordant.bam">
62 <filter>discordantFile</filter>
63 </data>
64 <data name="splitter_bam" format="bam" label="samblaster split alignments on ${on_string}" from_work_dir="splitter.bam">
65 <filter>splitterFile</filter>
66 </data>
67 <data name="unmapped_fastq" format="fastqsanger" label="samblaster unmapped fastq on ${on_string}" from_work_dir="unmapped.fastq">
68 <filter>unmappedFile</filter>
69 </data>
70 </outputs>
71 <tests>
72 <test>
73 <param name="input" value="sr.input.bam"/>
74 <param name="output" value="true"/>
75 <param name="discordandFile" value="false"/>
76 <param name="splitterFile" value="true"/>
77 <param name="unmappedFile" value="true"/>
78 <output name="output_bam" file="output.bam" compare="sim_size"/>
79 <output name="splitter_bam" file="splitters.bam" compare="sim_size"/>
80 <output name="unmapped_fastq">
81 <assert_contents>
82 <has_line line="@M00860:26:000000000-A6UGV:1:1101:10000:6072" />
83 </assert_contents>
84 </output>
85 </test>
86 <test>
87 <param name="input" value="sr.input.sam.gz" ftype="sam"/>
88 <param name="output" value="true"/>
89 <param name="discordandFile" value="false"/>
90 <param name="splitterFile" value="true"/>
91 <param name="unmappedFile" value="true"/>
92 <output name="output_bam" file="output.bam" compare="sim_size"/>
93 <output name="splitter_bam" file="splitters.bam" compare="sim_size"/>
94 <output name="unmapped_fastq">
95 <assert_contents>
96 <has_line line="@M00860:26:000000000-A6UGV:1:1101:10000:6072" />
97 </assert_contents>
98 </output>
99 </test>
100 </tests>
101 <help><![CDATA[
102
103 *samblaster*
104 ============
105
106 Summary
107 -------
108
109 *samblaster* is a fast and flexible program for marking duplicates in
110 **read-id grouped** paired-end SAM files. It can also optionally output
111 discordant read pairs and/or split read mappings to separate SAM files,
112 and/or unmapped/clipped reads to a separate FASTQ file. When marking
113 duplicates, *samblaster* will require approximately 20MB of memory per
114 1M read pairs.
115
116 Usage
117 -----
118
119 See the `SAM File Format
120 Specification <http://samtools.sourceforge.net/SAMv1.pdf>`__ for details
121 about the SAM alignment format.
122
123 By default, samblaster marks duplicates with SAM FLAG 0x400. The
124 **--removeDups** option will instead remove duplicate alignments from the
125 output file.
126
127 **ALIGNMENT TYPE DEFINITIONS:** Below, we will use the following
128 definitions for alignment types. Starting with *samblaster* release
129 0.1.22, these definitions are affected by the use of the **-M** option.
130 By default, *samblaster* will use the current definitions of alignment
131 types as specified in the `SAM
132 Specification <http://samtools.sourceforge.net/SAMv1.pdf>`__. Namely,
133 alignments marked with FLAG 0x100 are considered *secondary*, while
134 those marked with FLAG 0x800 are considered *supplemental*. If the
135 **-M** option is specified, alignments marked with either FLAG 0x100 or
136 0x800 are considered *supplemental*, and no alignments are considered
137 *secondary*. A *primary* alignment is always one that is neither
138 *secondary* nor *supplemental*. Only *primary* and *supplemental*
139 alignments are used to find chimeric (split-read) mappings. The **-M**
140 flag is used for backward compatibility with older SAM/BAM files in
141 which "chimeric" alignments were marked with FLAG 0x100, and should also
142 be used with output from more recent runs of *bwa mem* using its **-M**
143 option.
144
145 **DISCORDANT READ PAIR IDENTIFICATION:** A **discordant** read pair is
146 one which meets all of the following criteria:
147
148 1. Both side of the read pair are mapped (neither FLAG 0x4 or 0x8 is
149 set).
150 2. The *properly paired* FLAG (0x2) is not set.
151 3. *Secondary* or *supplemental* alignments are never output as
152 discordant, although a discordant read pair can have such alignments
153 associated with them.
154 4. Duplicate read pairs that meet the above criteria will be output as
155 discordant unless the **-e** option is used.
156
157 **UNMAPPED/CLIPPED READ IDENTIFICATION:** An **unmapped** or **clipped**
158 read is a *primary* alignment that is unaligned over all or part of its
159 length respectively. The lack of a full alignment may be caused by a SV
160 breakpoint that falls within the read. Therefore, *samblaster* will
161 optionally output such reads to a FASTQ file for re-alignment by a tool,
162 such as `YAHA <https://github.com/GregoryFaust/yaha/>`__, geared toward
163 finding split-read mappings. *samblaster* applies the following strategy
164 to identify and output unmapped/clipped reads:
165
166 1. An **unmapped** read has the *unmapped read* FLAG set (0x4).
167 2. A **clipped** read is a mapped read with a CIGAR string that begins
168 or ends with at least **--minClipSize** unaligned bases (CIGAR code S
169 and/or H), and is not from a read that has one or more *supplemental*
170 alignments.
171 3. In order for *samblaster* to output the entire sequence for clipped
172 reads, the input SAM file must have soft clipped primary alignments.
173 4. *samblaster* will output unmapped/clipped reads into a FASTQ file if
174 QUAL information is available in the input file, and a FASTA file if
175 not.
176 5. Unmapped/clipped reads that are part of a duplicate read pair will be
177 output unless the **-e** option is used.
178
179
180 **Written by:** Greg Faust (gf4ea@virginia.edu) `Ira Hall Lab,
181 University of Virginia <http://faculty.virginia.edu/irahall/>`__
182
183 **Please cite:** `Faust, G.G. and Hall, I.M., “\ *SAMBLASTER*: fast
184 duplicate marking and structural variant read extraction,”
185 *Bioinformatics* Sept. 2014; **30**\ (17):
186 2503-2505. <http://bioinformatics.oxfordjournals.org/content/30/17/2503>`__
187
188 **Also see:** `SAMBLASTER\_Supplemental.pdf
189 <https://github.com/GregoryFaust/samblaster/raw/master/SAMBLASTER_Supplemental.pdf>`__
190 for additonal discussion and statistics about the duplicates marked by
191 *samblaster* vs. *Picard* using the NA12878 sample dataset. Click the
192 preceeding link or download the file from this repository.
193 **Written by:** Greg Faust (gf4ea@virginia.edu) `Ira Hall Lab,
194 University of Virginia <http://faculty.virginia.edu/irahall/>`__
195
196 **Please cite:** `Faust, G.G. and Hall, I.M., “\ *SAMBLASTER*: fast
197 duplicate marking and structural variant read extraction,”
198 *Bioinformatics* Sept. 2014; **30**\ (17):
199 2503-2505. <http://bioinformatics.oxfordjournals.org/content/30/17/2503>`__
200
201 **Also see:** `SAMBLASTER\_Supplemental.pdf
202 <https://github.com/GregoryFaust/samblaster/raw/master/SAMBLASTER_Supplemental.pdf>`__
203 for additonal discussion and statistics about the duplicates marked by
204 *samblaster* vs. *Picard* using the NA12878 sample dataset. Click the
205 preceeding link or download the file from this repository.
206
207 ]]></help>
208 <citations>
209 <citation type="doi">10.1093/bioinformatics/btu314</citation>
210 </citations>
211 </tool>