annotate MDtag_filter.xml @ 0:447b7748eb83 draft

Uploaded tool config xml
author boris
date Tue, 24 Apr 2012 12:13:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
1 <tool id="MDtag_filter" name="Filter mapped reads" version="1.0.2">
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
2 <description>on MD tag string</description>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
3 <command interpreter="python">MDtag_filter.py $in_sam $n $m $out_sam $saveDiscarded $discarded_sam</command>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
4 <inputs>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
5 <param format="sam" name="in_sam" type="data" label="Input SAM file"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
6 <param name="n" type="integer" value='0' label="5' end window (n)" help="Any number of mismatches within this window will cause the read to be discarded"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
7 <param name="m" type="integer" value='0' label="3' end window (m)" help="Any number of mismatches within this window will cause the read to be discarded"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
8 <param name="saveDiscarded" label="Save discarded reads in additional SAM file?" type="boolean" truevalue="yes" falsevalue="no" checked="False"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
9 </inputs>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
10 <outputs>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
11 <data format="sam" name="out_sam" label="MDtag_filter_(selected)_from_${in_sam.name}" metadata_source="in_sam"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
12 <data format="sam" name="discarded_sam" label="MDtag_filter_(discarded)_from_${in_sam.name}" metadata_source="in_sam">
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
13 <filter> saveDiscarded is True </filter>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
14 </data>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
15 </outputs>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
16 <tests>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
17 <test>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
18 <param name="in_sam" value="test_for_md_filter.sam"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
19 <param name="n" value="5"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
20 <param name="m" value="5"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
21 <output name="out_sam" file="test_md_filtered.sam"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
22 </test>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
23 <test>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
24 <param name="in_sam" value="test_for_md_filter.sam"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
25 <param name="n" value="5"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
26 <param name="m" value="5"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
27 <param name="saveDiscarded" value="yes"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
28 <output name="out_sam" file="test_md_selected.sam"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
29 <output name="discarded_sam" file="test_md_discarded.sam"/>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
30 </test>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
31 </tests>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
32
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
33 <help>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
34
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
35 Mismatches at either end of a mapped read are most likely sequencing errors.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
36 This tool aims to control the variation noise due to potential sequencing errors.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
37
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
38 -----
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
39
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
40 .. class:: infomark
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
41
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
42 **What it does**
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
43
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
44 This tool reads the MD tag of mapped reads (see SAM format specification). The user defines the 5' and 3' windows **n** and **m** (in bp), respectively.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
45 The mapped read is discarded if it contains any number of mismatches within **n** bases of the read 5' end and within **m** bases of the read 3' end.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
46 Option: save discarded reads in an additional SAM file.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
47
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
48 -----
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
49
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
50 .. class:: warningmark
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
51
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
52 **Note**
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
53
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
54 Mapped reads without an MD tag will be removed from the output SAM file(s).
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
55
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
56 -----
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
57
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
58 .. class:: infomark
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
59
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
60 **About formats**
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
61
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
62 **SAM format** -- SAM stands for Sequence Alignment/Map format. It is a TAB-delimited text format consisting of a header section, which is optional, and an alignment section. Each alignment line has 11 mandatory fields for essential alignment information such as mapping position, and variable number of optional fields for flexible or aligner specific information.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
63
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
64 Each alignment line has 11 **mandatory** fields::
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
65
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
66 1. QNAME - Query template NAME
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
67 2. FLAG - bitwise FLAG
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
68 3. RNAME - Reference sequence NAME
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
69 4. POS - 1-based leftmost mapping POSition
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
70 5. MAPQ - MAPping Quality
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
71 6. CIGAR - CIGAR string
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
72 7. RNEXT - Ref. name of the mate/next segment
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
73 8. PNEXT - Position of the mate/next segment observed
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
74 9. TLEN - Template LENgth
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
75 10. SEQ - segment SEQuence
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
76 11. QUAL - ASCII of Phred-scaled base QUALity+33
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
77
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
78
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
79 All **optional** fields follow the TAG\:TYPE\:VALUE format, where TAG is a two-character string that matches [A-Za-z][A-Za-z0-9]. TYPE is a single case sensitive letter which defines the format of VALUE::
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
80
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
81 MD TAG
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
82
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
83 MD:Z:[0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)* with Z = Printable string, including space.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
84
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
85 String for mismatching positions. The MD field aims to achieve SNP/indel calling without looking at the reference.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
86 For example, a string ‘10A5^AC6’ means from the leftmost reference base in the alignment, there are 10 matches followed by an A on the reference which is different from the aligned read base; the next 5 reference bases are matches followed by a 2bp deletion from the reference; the deleted sequence is AC; the last 6 bases are matches. The MD field ought to match the CIGAR string.
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
87
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
88 -----
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
89
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
90 **Example**
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
91
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
92 - For the following dataset::
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
93
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
94 SRR057527.13746413 16 1 1164232 35 1I35M * 0 0 CGAAAGTGAGGTCCTGGCTCCAATCCAATCCCCGGG 333333033333333333333333333333333333 X0:i:1 X1:i:0 OC:Z:36M RG:Z:rnaseq XG:i:0 NM:i:2 XM:i:2 XO:i:0 OP:i:1164231 OQ:Z:CCCCCCDCCCCBCCCCCCCCCCCCCCCCCCCCCCCC XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
95 SRR057527.8574994 16 1 565901 23 36M * 0 0 GAGCCTAATCTACTCCACCTCAATCACACTACTCCC 333333333333333303333333333333333333 X0:i:1 X1:i:1 XA:Z:MT,-5351,36M,2; MD:Z:1C34 RG:Z:rnaseq XG:i:0 NM:i:1 XM:i:1 XO:i:0 OQ:Z:CCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCC XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
96 SRR057528.178504 0 1 566573 23 36M * 0 0 ACTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCA 233333323222222232333222222222222222 X0:i:1 X1:i:1 XA:Z:MT,+6023,36M,1; MD:Z:36 RG:Z:rnaseq XG:i:0 NM:i:0 XM:i:0 XO:i:0 OQ:Z::?CCCCBAB@AA@@A@B@??BA@A;AA@======:@ XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
97 SRR057527.20391474 0 1 565512 23 36M * 0 0 GGCAGTTGAGGGGGATTAAACCAAACCCAACTACGC %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% X0:i:1 X1:i:1 XA:Z:MT,+4962,36M,2; MD:Z:11T24 RG:Z:rnaseq XG:i:0 NM:i:1 XM:i:1 XO:i:0 OQ:Z:%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
98 SRR057513.2261668 16 1 16267 15 36M * 0 0 CACTTCTGGATGCTAGGGTTACACTGGGAGTCACAG 333333333333333333333333333333333333 X0:i:1 X1:i:6 MD:Z:30A5 RG:Z:rnaseq XG:i:0 NM:i:1 XM:i:1 XO:i:0 OQ:Z:IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
99
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
100
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
101
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
102 - running this tool with **n = 5** and **m =10**, will return::
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
103
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
104 SRR057528.178504 0 1 566573 23 36M * 0 0 ACTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCA 233333323222222232333222222222222222 X0:i:1 X1:i:1 XA:Z:MT,+6023,36M,1; MD:Z:36 RG:Z:rnaseq XG:i:0 NM:i:0 XM:i:0 XO:i:0 OQ:Z::?CCCCBAB@AA@@A@B@??BA@A;AA@======:@ XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
105 SRR057527.20391474 0 1 565512 23 36M * 0 0 GGCAGTTGAGGGGGATTAAACCAAACCCAACTACGC %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% X0:i:1 X1:i:1 XA:Z:MT,+4962,36M,2; MD:Z:11T24 RG:Z:rnaseq XG:i:0 NM:i:1 XM:i:1 XO:i:0 OQ:Z:%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
106 SRR057513.2261668 16 1 16267 15 36M * 0 0 CACTTCTGGATGCTAGGGTTACACTGGGAGTCACAG 333333333333333333333333333333333333 X0:i:1 X1:i:6 MD:Z:30A5 RG:Z:rnaseq XG:i:0 NM:i:1 XM:i:1 XO:i:0 OQ:Z:IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
107
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
108
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
109 </help>
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
110
447b7748eb83 Uploaded tool config xml
boris
parents:
diff changeset
111 </tool>