comparison tools/indels/indel_sam2interval.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 <tool id="indel_sam2interval" name="Extract indels" version="1.0.0">
2 <description>from SAM</description>
3 <command interpreter="python">
4 indel_sam2interval.py
5 --input=$input1
6 --include_base=$include_base
7 --collapse=$collapse
8 --int_out=$output1
9 #if $ins_out.include_ins_out == "true"
10 --bed_ins_out=$output2
11 #else
12 --bed_ins_out="None"
13 #end if
14 #if $del_out.include_del_out == "true"
15 --bed_del_out=$output3
16 #else
17 --bed_del_out="None"
18 #end if
19 </command>
20 <inputs>
21 <param format="sam" name="input1" type="data" label="Select dataset to convert" />
22 <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
23 <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
24 <conditional name="ins_out">
25 <param name="include_ins_out" type="select" label="Include insertions output bed file?">
26 <option value="true">Yes</option>
27 <option value="false">No</option>
28 </param>
29 <when value="true" />
30 <when value="false" />
31 </conditional>
32 <conditional name="del_out">
33 <param name="include_del_out" type="select" label="Include deletions output bed file?">
34 <option value="true">Yes</option>
35 <option value="false">No</option>
36 </param>
37 <when value="true" />
38 <when value="false" />
39 </conditional>
40 </inputs>
41 <outputs>
42 <data format="interval" name="output1" />
43 <data format="bed" name="output2">
44 <filter>ins_out[ "include_ins_out" ] == "true"</filter>
45 </data>
46 <data format="bed" name="output3">
47 <filter>del_out[ "include_del_out" ] == "true"</filter>
48 </data>
49 </outputs>
50 <tests>
51 <test>
52 <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
53 <param name="include_base" value="true"/>
54 <param name="collapse" value="true"/>
55 <param name="include_ins_out" value="true" />
56 <param name="include_del_out" value="true" />
57 <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
58 <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
59 <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
60 </test>
61 </tests>
62 <help>
63
64 **What it does**
65
66 Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.
67
68 -----
69
70 **Example**
71
72 Suppose you have the following mapping results::
73
74 r327 16 chrM 11 37 8M1D10M * 0 0 CTTACCAGATAGTCATCA -+&lt;2;?@BA@?-,.+4=4 XT:A:U NM:i:1 X0:i:1 X1:i:0 XM:i:0 XO:i:1 XG:i:1 MD:Z:41^C35
75 r457 0 chr1 14 37 14M * 0 0 ACCTGACAGATATC =/DF;?@1A@?-,. XT:A:U NM:i:0 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
76 r501 16 chrM 6 23 7M1I13M * 0 0 TCTGTGCCTACCAGACATTCA +=$2;?@BA@?-,.+4=4=4A XT:A:U NM:i:3 X0:i:1 X1:i:1 XM:i:2 XO:i:1 XG:i:1 MD:Z:28C36G9 XA:Z:chrM,+134263658,14M1I61M,4;
77 r1288 16 chrM 8 37 11M1I7M * 0 0 TCACTTACCTGTACACACA /*F2;?@%A@?-,.+4=4= XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:2T0T1A69
78 r1902 0 chr1 4 37 7M2D18M * 0 0 AGTCTCTTACCTGACGGTTATGA &lt;2;?@BA@?-,.+4=4=4AA663 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
79 r2204 16 chrM 9 0 19M * 0 0 CTGGTACCTGACAGGTATC 2;?@BA@?-,.+4=4=4AA XT:A:R NM:i:1 X0:i:2 X1:i:0 XM:i:1 XO:i:0 XG:i:0 MD:Z:0T75 XA:Z:chrM,-564927,76M,1;
80 r2314 16 chrM 6 37 10M2D8M * 0 0 TCACTCTTACGTCTGA &lt;2;?@BA@?-,.+4=4 XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:25A5^CA45
81 r3001 0 chrM 13 37 3M1D5M2I7M * 0 0 TACAGTCACCCTCATCA &lt;2;?@BA/(@?-,$&amp; XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
82 r3218 0 chr1 13 37 8M1D7M * 0 0 TACAGTCACTCATCA &lt;2;?@BA/(@?-,$&amp; XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:1 XO:i:1 XG:i:2 MD:Z:17^CA58A0
83 r4767 16 chr2 3 37 15M2I7M * 0 0 CAGACTCTCTTACCAAAGACAGAC &lt;2;?@BA/(@?-,.+4=4=4AA66 XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:2T1A4T65
84 r5333 0 chrM 5 37 17M1D8M * 0 0 GTCTCTCATACCAGACAACGGCAT FB3$@BA/(@?-,.+4=4=4AA66 XT:A:U NM:i:4 X0:i:1 X1:i:0 XM:i:3 XO:i:1 XG:i:1 MD:Z:45C10^C0C5C13
85 r6690 16 chrM 7 23 20M * 0 0 CTCTCTTACCAGACAGACAT 2;?@BA/(@?-,.+4=4=4A XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76 XA:Z:chrM,-568532,76M,1;
86 r7211 0 chrM 7 37 24M * 0 0 CGACAGAGACAAAATAACATTTAA //&lt;2;?@BA@?-,.+4=442;;6: XT:A:U NM:i:3 X0:i:1 X1:i:0 XM:i:2 XO:i:1 XG:i:1 MD:Z:73G0G0
87 r7899 69 * 0 0 * * 0 0 CTGCGTGTTGGTGTCTACTGGGGT #%#'##$#$##&amp;%#%$$$%#%#'#
88 r9192 133 * 0 0 * * 0 0 GTGCGTCGGGGAGGGTGCTGTCGG ######%#$%#$$###($###&amp;&amp;%
89 r9922 16 chrM 4 0 7M3I9M * 0 0 CCAGACATTTGAAATCAGG F/D4=44^D++26632;;6 XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
90 r9987 16 chrM 4 0 9M1I18M * 0 0 AGGTTCTCATTACCTGACACTCATCTTG G/AD6"/+4=4426632;;6:&lt;2;?@BA XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
91 r10145 16 chr1 16 0 5M2D7M * 0 0 CACATTGTTGTA G//+4=44=4AA XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
92 r10324 16 chrM 15 0 6M1D5M * 0 0 CCGTTCTACTTG A@??8.G//+4= XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
93 r12331 16 chrM 17 0 4M2I6M * 0 0 AGTCGAATACGTG 632;;6:&lt;2;?@B XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
94 r12914 16 chr2 24 0 4M3I3M * 0 0 ACTACCCCAA G//+4=42,. XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
95 r13452 16 chrM 13 0 3M1D11M * 0 0 TACGTCACTCATCA IIIABCCCICCCCI XT:A:U NM:i:0 X0:i:1 X1:i:1 XM:i:0 XO:i:0 XG:i:0 MD:Z:76
96
97
98 The following three files will be produced (Interval, Insertions BED and Deletions BED)::
99
100 chr1 11 13 D - 1
101 chr1 21 22 D - 1
102 chr1 21 23 D - 1
103 chr2 18 19 I AA 1
104 chr2 28 29 I CCC 1
105 chrM 11 12 I TTT 1
106 chrM 13 14 I C 1
107 chrM 13 14 I T 1
108 chrM 16 17 D - 1
109 chrM 16 18 D - 1
110 chrM 19 20 D - 1
111 chrM 19 20 I T 1
112 chrM 21 22 D - 1
113 chrM 21 22 I GA 1
114 chrM 22 23 D - 1
115
116 chr2 18 19
117 chr2 28 29
118 chrM 11 12
119 chrM 13 14
120 chrM 13 14
121 chrM 19 20
122 chrM 21 22
123
124 chr1 11 13
125 chr1 21 22
126 chr1 21 23
127 chrM 16 17
128 chrM 16 18
129 chrM 19 20
130 chrM 21 22
131 chrM 22 23
132
133 For more information on SAM, please consult the `SAM format description`__.
134
135 .. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943
136
137
138 </help>
139 </tool>