comparison clip_overlap.xml @ 0:79725ecf10a3 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bamutil commit 29e40a76f1e249c3ed73f9129ad711beba34eb07"
author iuc
date Mon, 29 Mar 2021 14:15:42 +0000
parents
children 047a20d4258f
comparison
equal deleted inserted replaced
-1:000000000000 0:79725ecf10a3
1 <tool id="bamutil_clip_overlap" name="BamUtil clipOverlap" version="@WRAPPER_VERSION@" profile="@PROFILE@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="edam"/>
8 <command detect_errors="exit_code"><![CDATA[
9 ## clipOverlap uses the output file
10 ## extension to determine the output format.
11 #if $input.ext.endswith('bam'):
12 #set tmp_out = 'output.bam'
13 #else:
14 #set tmp_out = 'output.sam'
15 #end if
16 trap '>&2 cat output.log' EXIT;
17 touch 'output.log' &&
18 bam clipOverlap
19 --in '$input'
20 #if str($storeOrig):
21 --storeOrig '$storeOrig'
22 #end if
23 $stats
24 #if str($input.ext) == 'qname_sorted.bam':
25 --readName
26 #end if
27 $overlapsOnly
28 #if str($excludeFlags):
29 --excludeFlags $excludeFlags
30 #end if
31 $unmapped
32 --noPhoneHome
33 --out '$tmp_out'
34 2> 'output.log'
35 && mv '$tmp_out' '$output'
36 #if str($stats):
37 && cp 'output.log' '$output_stats'
38 #end if
39 ]]></command>
40 <inputs>
41 <param name="input" type="data" format="sam,bam,qname_sorted.bam" label="Select SAM or BAM file on which to clip overlapping read pairs"/>
42 <param argument="--storeOrig" type="text" value="" label="Enter a tag in which to store the original CIGAR" help="Leave blank to skip">
43 <sanitizer invalid_char="">
44 <valid initial="string.letters,string.digits"/>
45 </sanitizer>
46 </param>
47 <param argument="--stats" type="boolean" truevalue="--stats" falsevalue="" checked="false" label="Output statistics on the overlaps?"/>
48 <param argument="--overlapsOnly" type="boolean" truevalue="--overlapsOnly" falsevalue="" checked="false" label="Only output overlapping read pairs?"/>
49 <param argument="--excludeFlags" type="integer" optional="true" value="" label="Enter an integer representation of a flag to skip records with any of the specified flags set" help="See the help section below for information about this option"/>
50 <param argument="--unmapped" type="boolean" truevalue="--unmapped" falsevalue="" checked="false" label="Mark records that would be completely clipped as unmapped?"/>
51 </inputs>
52 <outputs>
53 <data name="output" format_source="input" metadata_source="input"/>
54 <data name="output_stats" format="txt" label="${tool.name} on ${on_string}: Statistics">
55 <filter>stats</filter>
56 </data>
57 </outputs>
58 <tests>
59 <test expect_num_outputs="1">
60 <param name="input" value="input.sam" ftype="sam"/>
61 <output name="output" file="output.sam" ftype="sam"/>
62 </test>
63 <test expect_num_outputs="2">
64 <param name="input" value="input.bam" ftype="bam"/>
65 <param name="storeOrig" value="6M"/>
66 <param name="stats" value="--stats"/>
67 <output name="output" file="input.bam" ftype="bam"/>
68 <output name="output_stats" file="output_stats.txt" ftype="txt"/>
69 </test>
70 <test expect_num_outputs="1">
71 <param name="input" value="input_qname_sorted.bam" ftype="qname_sorted.bam"/>
72 <output name="output" file="output_qname_sorted.bam" ftype="qname_sorted.bam"/>
73 </test>
74 </tests>
75 <help>
76 **What it does**
77
78 Clips overlapping read pairs in a SAM or BAM file based on criteria.
79
80 The input file and resulting output file are sorted by coordinate (or readName if specified in the options).
81
82 When a read is clipped from the front:
83
84 * the read start position is updated to reflect the clipping
85 * the mate's mate start position is updated to reflect the record's new position
86 * the record is placed in the output file in the correct location based on the updated position
87
88 To handle coordinate-sorted files, SAM/BAM records are buffered up until it is known that all following records will have a later start position. To prevent the program from running away with memory, a limit is set to the number of records that can be buffered, see --poolSize for more information.
89
90 When two mates overlap, this tool will clip the record's whose clipped region would have the lowest average quality.
91
92 It also checks strand. If a forward strand extends past the end of a reverse strand, that will be clipped. Similarly, if a reverse strand starts before the forward strand, the region prior to the forward strand will be clipped. If the reverse strand occurs entirely before the forward strand, both strands will be entirely clipped. If the --unmapped option is specified, then rather than clipping an entire read, it will be marked as unmapped.
93 The qualities on the two strands remain unchanged even with clipping.
94
95 The excludeFlags option accepts a decimal value and skips the records with the specified flags set. The default is 3852 (0xF0C hex), so records with any of the following flags set will be skipped:
96
97 * unmapped
98 * mate unmapped
99 * secondary alignment
100 * fails QC checks
101 * duplicate
102 * supplementary
103
104 **Assumptions/ Restrictions**
105
106 * Assumes the file is sorted by Coordinate (or ReadName if using --readName option)
107 * Assumes only 2 reads have matching ReadNames (Supplementary and Secondary reads are ignored/skipped by default so will not cause a problem)
108
109 * It matches in pairs, so if there are 3, the first 2 will be matched and compared, but the 3rd won't. If there are 4, the first 2 will be matched and the last 2 will be matched and compared.
110
111 * Only mapped reads will be clipped
112 * Assumes that mate information in records are accurate
113
114 **Clipping from the front**
115
116 The first operation after the softclip will be a Match/Mismatch, meaning that any trailing pads, deletions, insertions, or skips will also be soft clipped.
117
118 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
119 | Clip location | How it is handled |
120 +================================+=========================================================+======================================================================================+
121 | If the clip position falls in a skip/deletion | Removes the entire skip/deletion |
122 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
123 | If the position immediately after the clip is a skip/deletion | Also removes the skip/deletion |
124 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
125 | If the position immediately after the clip is an Insert | Softclips the insert |
126 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
127 | If the position immediately after the clip is a Pad | Removes the pad |
128 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
129 | Clip occurs at the last match/mismatch position of the read (the entire read is clipped) | Entire read is soft clipped, 0-based position is left as the original (not modified) |
130 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
131 | Clip occurs after the read ends | Entire read is soft clipped, 0-based position is left as the original (not modified) |
132 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
133 | Clip occurs before the read starts | Nothing is clipped. The read is not changed. |
134 +------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------+
135
136 **Clipping from the back**
137
138 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
139 | Clip location | How it is handled |
140 +==================+=========================================================+===================================================================================================================+
141 | If the clip position falls in a skip/deletion | Removes the entire skip/deletion |
142 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
143 | If the position immediately before the clip is a deletion/skip/pad | Remove the deletion/skip/pad |
144 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
145 | If the position immediately before the clip is an insertion | Leave the insertion, even if it results in a 70M3I27S |
146 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
147 | Clip occurs at the first position of the read (the entire read is clipped) | Entire read is soft clipped, preceding insertions remain, 0-based position is left as the original (not modified) |
148 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
149 | Clip occurs before the read starts | Entire read is soft clipped, 0-based position is left as the original (not modified) |
150 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
151 | Clip occurs after the read ends | Nothing is clipped. The read is not changed. |
152 +----------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
153
154 </help>
155 <expand macro="citations"/>
156 </tool>
157