view tools/indels/indel_sam2interval.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="indel_sam2interval" name="Extract indels" version="1.0.0">
  <description>from SAM</description>
  <command interpreter="python">
    indel_sam2interval.py
      --input=$input1
      --include_base=$include_base
      --collapse=$collapse
      --int_out=$output1
      #if $ins_out.include_ins_out == "true"
        --bed_ins_out=$output2
      #else
        --bed_ins_out="None"
      #end if
      #if $del_out.include_del_out == "true"
        --bed_del_out=$output3
      #else
        --bed_del_out="None"
      #end if
  </command>
  <inputs>
    <param format="sam" name="input1" type="data" label="Select dataset to convert" />
    <param name="include_base" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Include the relevant base(s) for each insertion (and a dash (-) for deletions)" />
    <param name="collapse" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Collapse repeated locations onto single line with counts" />
    <conditional name="ins_out">
      <param name="include_ins_out" type="select" label="Include insertions output bed file?">
        <option value="true">Yes</option>
        <option value="false">No</option>
      </param>
      <when value="true" />
      <when value="false" />
    </conditional>
    <conditional name="del_out">
      <param name="include_del_out" type="select" label="Include deletions output bed file?">
        <option value="true">Yes</option>
        <option value="false">No</option>
      </param>
      <when value="true" />
      <when value="false" />
    </conditional>
  </inputs>
  <outputs>
    <data format="interval" name="output1" />
    <data format="bed" name="output2">
      <filter>ins_out[ "include_ins_out" ] == "true"</filter>
    </data>
    <data format="bed" name="output3">
      <filter>del_out[ "include_del_out" ] == "true"</filter>
    </data>
  </outputs>
  <tests>
    <test>
      <param name="input1" value="indel_sam2interval_in1.sam" ftype="sam"/>
      <param name="include_base" value="true"/>
      <param name="collapse" value="true"/>
      <param name="include_ins_out" value="true" />
      <param name="include_del_out" value="true" />
      <output name="output1" file="indel_sam2interval_out1.interval" ftype="interval"/>
      <output name="output2" file="indel_sam2interval_out2.bed" ftype="bed"/>
      <output name="output3" file="indel_sam2interval_out3.bed" ftype="bed"/>
    </test>
  </tests>
  <help>

**What it does**

Given a SAM file containing indels, converts these to an interval file with a column indicating whether it is an insertion or a deletion, and then also can create a BED file for each type (one for insertions, one for deletions). The interval file can be combined with other like files to create a table useful for analysis with the Indel Analysis Table tool. The BED files can be useful for visualizing the reads.

-----

**Example**

Suppose you have the following mapping results::

 r327     16   chrM   11   37      8M1D10M   *   0   0             CTTACCAGATAGTCATCA   -+&lt;2;?@BA@?-,.+4=4             XT:A:U  NM:i:1  X0:i:1  X1:i:0  XM:i:0  XO:i:1  XG:i:1  MD:Z:41^C35
 r457      0   chr1   14   37          14M   *   0   0                 ACCTGACAGATATC   =/DF;?@1A@?-,.                 XT:A:U  NM:i:0  X0:i:1  X1:i:0  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r501     16   chrM    6   23      7M1I13M   *   0   0          TCTGTGCCTACCAGACATTCA   +=$2;?@BA@?-,.+4=4=4A          XT:A:U  NM:i:3  X0:i:1  X1:i:1  XM:i:2  XO:i:1  XG:i:1  MD:Z:28C36G9        XA:Z:chrM,+134263658,14M1I61M,4;
 r1288    16   chrM    8   37      11M1I7M   *   0   0            TCACTTACCTGTACACACA   /*F2;?@%A@?-,.+4=4=            XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T0T1A69
 r1902     0   chr1    4   37      7M2D18M   *   0   0        AGTCTCTTACCTGACGGTTATGA   &lt;2;?@BA@?-,.+4=4=4AA663        XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
 r2204    16   chrM    9    0          19M   *   0   0            CTGGTACCTGACAGGTATC   2;?@BA@?-,.+4=4=4AA            XT:A:R  NM:i:1  X0:i:2  X1:i:0  XM:i:1  XO:i:0  XG:i:0  MD:Z:0T75           XA:Z:chrM,-564927,76M,1;
 r2314    16   chrM    6   37      10M2D8M   *   0   0               TCACTCTTACGTCTGA   &lt;2;?@BA@?-,.+4=4               XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:25A5^CA45
 r3001     0   chrM   13   37   3M1D5M2I7M   *   0   0              TACAGTCACCCTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
 r3218     0   chr1   13   37       8M1D7M   *   0   0                TACAGTCACTCATCA   &lt;2;?@BA/(@?-,$&amp;                XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:1  XO:i:1  XG:i:2  MD:Z:17^CA58A0
 r4767    16   chr2    3   37      15M2I7M   *   0   0       CAGACTCTCTTACCAAAGACAGAC   &lt;2;?@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:2T1A4T65
 r5333     0   chrM    5   37      17M1D8M   *   0   0       GTCTCTCATACCAGACAACGGCAT   FB3$@BA/(@?-,.+4=4=4AA66       XT:A:U  NM:i:4  X0:i:1  X1:i:0  XM:i:3  XO:i:1  XG:i:1  MD:Z:45C10^C0C5C13
 r6690    16   chrM    7   23          20M   *   0   0           CTCTCTTACCAGACAGACAT   2;?@BA/(@?-,.+4=4=4A           XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76             XA:Z:chrM,-568532,76M,1;
 r7211     0   chrM    7   37          24M   *   0   0       CGACAGAGACAAAATAACATTTAA   //&lt;2;?@BA@?-,.+4=442;;6:       XT:A:U  NM:i:3  X0:i:1  X1:i:0  XM:i:2  XO:i:1  XG:i:1  MD:Z:73G0G0
 r7899    69      *    0    0            *   *   0   0       CTGCGTGTTGGTGTCTACTGGGGT   #%#'##$#$##&amp;%#%$$$%#%#'#
 r9192   133      *    0    0            *   *   0   0       GTGCGTCGGGGAGGGTGCTGTCGG   ######%#$%#$$###($###&amp;&amp;%
 r9922    16   chrM    4    0       7M3I9M   *   0   0            CCAGACATTTGAAATCAGG   F/D4=44^D++26632;;6            XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r9987    16   chrM    4    0      9M1I18M   *   0   0   AGGTTCTCATTACCTGACACTCATCTTG   G/AD6"/+4=4426632;;6:&lt;2;?@BA   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r10145   16   chr1   16    0       5M2D7M   *   0   0                   CACATTGTTGTA   G//+4=44=4AA                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r10324   16   chrM   15    0       6M1D5M   *   0   0                   CCGTTCTACTTG   A@??8.G//+4=                   XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r12331   16   chrM   17    0       4M2I6M   *   0   0                  AGTCGAATACGTG   632;;6:&lt;2;?@B                  XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r12914   16   chr2   24    0       4M3I3M   *   0   0                     ACTACCCCAA   G//+4=42,.                     XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76
 r13452   16   chrM   13    0      3M1D11M   *   0   0                 TACGTCACTCATCA   IIIABCCCICCCCI                 XT:A:U  NM:i:0  X0:i:1  X1:i:1  XM:i:0  XO:i:0  XG:i:0  MD:Z:76


The following three files will be produced (Interval, Insertions BED and Deletions BED)::

 chr1   11   13   D     -   1
 chr1   21   22   D     -   1
 chr1   21   23   D     -   1
 chr2   18   19   I    AA   1
 chr2   28   29   I   CCC   1
 chrM   11   12   I   TTT   1
 chrM   13   14   I     C   1
 chrM   13   14   I     T   1
 chrM   16   17   D     -   1
 chrM   16   18   D     -   1
 chrM   19   20   D     -   1
 chrM   19   20   I     T   1
 chrM   21   22   D     -   1
 chrM   21   22   I    GA   1
 chrM   22   23   D     -   1

 chr2   18   19
 chr2   28   29
 chrM   11   12
 chrM   13   14
 chrM   13   14
 chrM   19   20
 chrM   21   22

 chr1   11   13
 chr1   21   22
 chr1   21   23
 chrM   16   17
 chrM   16   18
 chrM   19   20
 chrM   21   22
 chrM   22   23

For more information on SAM, please consult the `SAM format description`__.

.. __: http://www.ncbi.nlm.nih.gov/pubmed/19505943


  </help>
</tool>