Mercurial > repos > jjohnson > bamutil_diff
comparison bamutil_diff.xml @ 0:2cafa8420c04 draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/bamutil/ commit c1945909ca200610f128577b68a82d9228905f3d-dirty"
| author | jjohnson |
|---|---|
| date | Fri, 26 Mar 2021 13:16:53 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2cafa8420c04 |
|---|---|
| 1 <tool id="bamutil_diff" name="BamUtil diff" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> | |
| 2 <description>two coordinate sorted SAM/BAM files</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements" /> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 bam diff | |
| 9 --in1 '$in1' | |
| 10 --in2 '$in2' | |
| 11 #if $fields.choice == 'all': | |
| 12 --all | |
| 13 #elif $fields.choice == 'select': | |
| 14 $fields.flag | |
| 15 $fields.mapQual | |
| 16 $fields.mate | |
| 17 $fields.isize | |
| 18 $fields.seq | |
| 19 $fields.baseQual | |
| 20 $fields.noCigar | |
| 21 $fields.noPos | |
| 22 #if $fields.samtags.tagchoice == 'everyTag': | |
| 23 --everyTag | |
| 24 #elif $fields.samtags.tagchoice == 'specify': | |
| 25 --tags '$fields.samtags.tags' | |
| 26 #end if | |
| 27 #end if | |
| 28 --posDiff $posDiff | |
| 29 --recPoolSize -1 | |
| 30 $onlyDiffs | |
| 31 --params | |
| 32 --noPhoneHome | |
| 33 --out $output_as | |
| 34 ]]></command> | |
| 35 <inputs> | |
| 36 <param argument="--in1" type="data" format="sam,bam" label="Input BAM 1"/> | |
| 37 <param argument="--in2" type="data" format="sam,bam" label="Input BAM 2"/> | |
| 38 <param argument="--posDiff" type="integer" value="100000" min="0" label="max base pair difference between possibly matching records"/> | |
| 39 <param argument="--onlyDiffs" type="boolean" truevalue="--onlyDiffs" falsevalue="" checked="false" label="only print the fields that differ"/> | |
| 40 <conditional name="fields"> | |
| 41 <param name="choice" type="select" label="BAM fields to diff"> | |
| 42 <option value="default" selected="true">Read Name, Flag Fragment bit, Position, Cigar</option> | |
| 43 <option value="all">Diff all the SAM/BAM fields</option> | |
| 44 <option value="select">Select SAM/BAM fields to diff</option> | |
| 45 </param> | |
| 46 <when value="default"/> | |
| 47 <when value="all"/> | |
| 48 <when value="select"> | |
| 49 <param argument="--flag" type="boolean" truevalue="--flag" falsevalue="" checked="false" label="diff the flags."/> | |
| 50 <param argument="--mapQual" type="boolean" truevalue="--mapQual" falsevalue="" checked="false" label="diff the mapping qualities."/> | |
| 51 <param argument="--mate" type="boolean" truevalue="--mate" falsevalue="" checked="false" label="diff the mate chrom/pos."/> | |
| 52 <param argument="--isize" type="boolean" truevalue="--isize" falsevalue="" checked="false" label="diff the insert sizes."/> | |
| 53 <param argument="--seq" type="boolean" truevalue="--seq" falsevalue="" checked="false" label="diff the sequence bases."/> | |
| 54 <param argument="--baseQual" type="boolean" truevalue="--baseQual" falsevalue="" checked="false" label="diff the base qualities."/> | |
| 55 <param argument="--noCigar" type="boolean" truevalue="--noCigar" falsevalue="" checked="false" label="do not diff the the cigars."/> | |
| 56 <param argument="--noPos" type="boolean" truevalue="--noPos" falsevalue="" checked="false" label="do not diff the positions."/> | |
| 57 <conditional name="samtags"> | |
| 58 <param name="tagchoice" type="select" label="Tags to diff"> | |
| 59 <option value="none">Do not diff tags</option> | |
| 60 <option value="everyTag">Diff every tag</option> | |
| 61 <option value="specify">Specify tags to diff</option> | |
| 62 </param> | |
| 63 <when value="none"/> | |
| 64 <when value="everyTag"/> | |
| 65 <when value="specify"> | |
| 66 <param argument="--tags" type="text" label="diff the specified Tags formatted as Tag:Type,Tag:Type,Tag:Type..."> | |
| 67 <validator type="regex" message="SAM 2-char Tag:type">^([A-Za-z][A-Za-z0-9]:[AifZHB])(,[A-Za-z][A-Za-z0-9]:[AifZHB])*$</validator> | |
| 68 </param> | |
| 69 </when> | |
| 70 </conditional> | |
| 71 </when> | |
| 72 </conditional> | |
| 73 <param name="output_as" type="select" label="Output format"> | |
| 74 <option value="diff.txt">ASCII text diff file</option> | |
| 75 <option value="diff.bam">BAM files: diff, only_in_file1, only_in_file2</option> | |
| 76 <option value="diff.sam">SAM files: diff, only_in_file1, only_in_file2</option> | |
| 77 </param> | |
| 78 </inputs> | |
| 79 <outputs> | |
| 80 <data name="diff_bam" format="bam" from_work_dir="diff.bam" label="${tool.name} on ${on_string}: diff.bam"> | |
| 81 <filter>output_as == 'diff.bam'</filter> | |
| 82 </data> | |
| 83 <data name="diff_only1_bam" format="bam" from_work_dir="diff_only1_*.bam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}"> | |
| 84 <filter>output_as == 'diff.bam'</filter> | |
| 85 </data> | |
| 86 <data name="diff_only2_bam" format="bam" from_work_dir="diff_only2_*.bam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}"> | |
| 87 <filter>output_as == 'diff.bam'</filter> | |
| 88 </data> | |
| 89 <data name="diff_sam" format="sam" from_work_dir="diff.sam" label="${tool.name} on ${on_string}: diff.sam"> | |
| 90 <filter>output_as == 'diff.sam'</filter> | |
| 91 </data> | |
| 92 <data name="diff_only1_sam" format="sam" from_work_dir="diff_only1_*.sam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}"> | |
| 93 <filter>output_as == 'diff.sam'</filter> | |
| 94 </data> | |
| 95 <data name="diff_only2_sam" format="sam" from_work_dir="diff_only2_*.sam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}"> | |
| 96 <filter>output_as == 'diff.sam'</filter> | |
| 97 </data> | |
| 98 <data name="diff_txt" format="txt" from_work_dir="diff.txt" label="${tool.name} on ${on_string}: diff.txt"> | |
| 99 <filter>output_as == 'diff.txt'</filter> | |
| 100 </data> | |
| 101 </outputs> | |
| 102 <tests> | |
| 103 <!-- Test-1 --> | |
| 104 <test> | |
| 105 <param name="in1" ftype="sam" value="in1.sam"/> | |
| 106 <param name="in2" ftype="sam" value="in2.sam"/> | |
| 107 <param name="posDiff" value="100000"/> | |
| 108 <param name="onlyDiffs" value="true"/> | |
| 109 <conditional name="fields"> | |
| 110 <param name="choice" value="default"/> | |
| 111 </conditional> | |
| 112 <param name="output_as" value="diff.txt"/> | |
| 113 <output name="diff_txt" file="diff.txt"/> | |
| 114 <output name="diff_txt"> | |
| 115 <assert_contents> | |
| 116 <has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" /> | |
| 117 <has_text_matching expression="<\t1a3\t74M74N1M" /> | |
| 118 <has_text_matching expression=">\ta3\t74M66N1M" /> | |
| 119 </assert_contents> | |
| 120 </output> | |
| 121 </test> | |
| 122 | |
| 123 <!-- Test-2 --> | |
| 124 <test> | |
| 125 <param name="in1" ftype="sam" value="in1.sam"/> | |
| 126 <param name="in2" ftype="sam" value="in2.sam"/> | |
| 127 <param name="posDiff" value="100000"/> | |
| 128 <param name="onlyDiffs" value="true"/> | |
| 129 <conditional name="fields"> | |
| 130 <param name="choice" value="select"/> | |
| 131 <param name="flag" value="true"/> | |
| 132 <param name="seq" value="true"/> | |
| 133 <conditional name="samtags"> | |
| 134 <param name="tagchoice" value="specify"/> | |
| 135 <param name="tags" value="AS:i,MD:Z"/> | |
| 136 </conditional> | |
| 137 </conditional> | |
| 138 <param name="output_as" value="diff.sam"/> | |
| 139 <output name="diff_sam"> | |
| 140 <assert_contents> | |
| 141 <has_text text="NB500964:249:HHLFNBGX7:4:12608:21020:10228" /> | |
| 142 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" /> | |
| 143 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" /> | |
| 144 </assert_contents> | |
| 145 </output> | |
| 146 <output name="diff_only1_sam"> | |
| 147 <assert_contents> | |
| 148 <has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" /> | |
| 149 <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" /> | |
| 150 <has_text text="TGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCACCG" /> | |
| 151 <has_text text="AS:i:0" /> | |
| 152 <has_text text="MD:Z:75" /> | |
| 153 </assert_contents> | |
| 154 </output> | |
| 155 <output name="diff_only2_sam"> | |
| 156 <assert_contents> | |
| 157 <has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" /> | |
| 158 <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" /> | |
| 159 <has_text text="ATCTGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCA" /> | |
| 160 <has_text text="AS:i:0" /> | |
| 161 <has_text text="MD:Z:75" /> | |
| 162 </assert_contents> | |
| 163 </output> | |
| 164 </test> | |
| 165 <!-- Test-3 --> | |
| 166 <test> | |
| 167 <param name="in1" ftype="sam" value="in1.sam"/> | |
| 168 <param name="in2" ftype="sam" value="in3.sam"/> | |
| 169 <param name="posDiff" value="100000"/> | |
| 170 <param name="onlyDiffs" value="true"/> | |
| 171 <conditional name="fields"> | |
| 172 <param name="choice" value="default"/> | |
| 173 </conditional> | |
| 174 <param name="output_as" value="diff.txt"/> | |
| 175 <output name="diff_txt"> | |
| 176 <assert_contents> | |
| 177 <not_has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" /> | |
| 178 </assert_contents> | |
| 179 </output> | |
| 180 </test> | |
| 181 </tests> | |
| 182 <help><![CDATA[ | |
| 183 **bamUtil diff** | |
| 184 | |
| 185 The diff option on the bamUtil executable prints the difference between two coordinate sorted SAM/BAM files. This can be used to compare the outputs of running a SAM/BAM through different tools/versions of tools. | |
| 186 The diff tool compares records that have the same Read Name and Fragment (from the flag). If a matching ReadName & Fragment is not found, the record is considered to be different. | |
| 187 diff assumes the files are coordinate sorted and uses this assumption for determining how long to store a record before determining that the other file does not contain a matching ReadName/Fragment. If the files are not coordinate sorted, this logic does not work. | |
| 188 By default, just the chromosome/position and cigar are compared for each record. | |
| 189 Note: The headers are not compared. | |
| 190 | |
| 191 Options are available to compare:: | |
| 192 | |
| 193 - all fields | |
| 194 - flags | |
| 195 - mapping quality | |
| 196 - mate chromosome/position | |
| 197 - insert size | |
| 198 - sequence | |
| 199 - base quality | |
| 200 - specified tags | |
| 201 - all tags | |
| 202 - turn off position comparison | |
| 203 - turn off cigar comparison | |
| 204 | |
| 205 **Inputs** | |
| 206 Two BAM or SAM alignment files | |
| 207 | |
| 208 **Outputs** | |
| 209 Choice of 2 Output Formats: | |
| 210 | |
| 211 :: | |
| 212 | |
| 213 **Diff Format** | |
| 214 There are 2 types of differences. | |
| 215 ReadName/Fragment combo is in one file, but not in the other file within the window set by recPoolSize & posDiff | |
| 216 ReadName/Fragment combo is in both files, but at least one of the specified fields to diff is different | |
| 217 Each difference output consists of 2 or 3 lines. If the record only appears in one of the files, the diff is 2 lines, if it appears in both files, the diff is 3 lines. | |
| 218 The first line of the difference output is just the read name. | |
| 219 The 2nd and 3rd line (if present) begin with either a '<' or a '>'. If the record is from the first file (--in1), it begins with a '<'. If the record is from the 2nd file (--in2), it begins with a '>'. | |
| 220 The 2nd line is the flag followed by the diff'd fields from one of the records. | |
| 221 The 3rd line (if a matching record was found) is the flag followed by the diff'd fields from the matching record. | |
| 222 The diff'd record lines are tab separated, and are in the following order if --onlyDiffs is not specified:: | |
| 223 | |
| 224 - '<' or '>' | |
| 225 - flag | |
| 226 - chrom:pos (chromosome name ':' 1 based position) - if --noPos is not specified | |
| 227 - cigar - if --noCigar is not specified | |
| 228 - mapping quality - if --mapq or --all is specified | |
| 229 - mate chrom:pos (chromosome name ':' 1 based position) - if --mate or --all is specified | |
| 230 - insert size - if --isize or --all is specified | |
| 231 - sequence - if --seq or --all is specified | |
| 232 - base quality - if --baseQual or --all is specified | |
| 233 - tag:type:value - for each tag:type specified in --tags or for every tag if --all or --everyTag specified | |
| 234 | |
| 235 | |
| 236 **BAM Format** | |
| 237 In SAM/BAM format there will be 3 output files:: | |
| 238 | |
| 239 1. the specified name with record diffs | |
| 240 2. specified name with _only_<in1>.sam/bam with records only in the in1 file | |
| 241 3. specified name with _only_<in2>.sam/bam with records only in the in2 file | |
| 242 | |
| 243 Records that are identical in the two files are not written in any of these output files. | |
| 244 When a record is found in both input files, but a difference is found, the record from the first file is written with additional tags to indicate the values from the second file, using the following tags:: | |
| 245 | |
| 246 - ZF - Flag | |
| 247 - ZP - Chromosome:1-based Position | |
| 248 - ZC - Cigar | |
| 249 - ZM - Mapping Quality | |
| 250 - ZN - Chromosome:1-based Mate Position | |
| 251 - ZI - Insert Size | |
| 252 - ZS - Sequence | |
| 253 - ZQ - Base Quality | |
| 254 - ZT - Tags | |
| 255 | |
| 256 If --onlyDiffs is not specified, all fields that were compared will be printed in the tags. If --onlyDiffs is specified, then only the differing compared fields will be printed in the tags. | |
| 257 | |
| 258 | |
| 259 | |
| 260 | |
| 261 https://genome.sph.umich.edu/wiki/BamUtil:_diff | |
| 262 | |
| 263 ]]></help> | |
| 264 <expand macro="citations" /> | |
| 265 </tool> |
