diff sam_pileup.xml @ 0:95612c159681

Uploaded tool tarball.
author devteam
date Mon, 26 Aug 2013 14:21:12 -0400
parents
children e7d863c5c5d6
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_pileup.xml	Mon Aug 26 14:21:12 2013 -0400
@@ -0,0 +1,190 @@
+<tool id="sam_pileup" name="Generate pileup" version="1.1.1">
+  <description>from BAM dataset</description>
+  <requirements>
+    <requirement type="package" version="0.1.16">samtools</requirement>
+  </requirements>
+  <command interpreter="python">
+    sam_pileup.py
+      --input1=$input1
+      --output=$output1
+      --ref=$refOrHistory.reference
+      #if $refOrHistory.reference == "history":
+        --ownFile=$refOrHistory.ownFile
+      #else:
+        --ownFile="None"
+      #end if
+       --dbkey=${input1.metadata.dbkey}
+       --indexDir=${GALAXY_DATA_INDEX_DIR}
+       --bamIndex=${input1.metadata.bam_index}
+       --lastCol=$lastCol
+       --indels=$indels
+       --mapCap=$mapCap
+       --consensus=$c.consensus
+      #if $c.consensus == "yes":
+        --theta=$c.theta
+        --hapNum=$c.hapNum
+        --fraction=$c.fraction
+        --phredProb=$c.phredProb
+       #else:
+        --theta="None"
+        --hapNum="None"
+        --fraction="None"
+        --phredProb="None"
+      #end if
+  </command>
+  <inputs>
+    <conditional name="refOrHistory">
+      <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
+        <option value="indexed">Use a built-in index</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="indexed">
+        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
+           <validator type="unspecified_build" />
+           <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" />
+        </param>
+      </when>
+      <when value="history">
+        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
+        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
+      </when>
+    </conditional>
+    <param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
+      <option value="no">Do not print the mapping quality as the last column</option>
+      <option value="yes">Print the mapping quality as the last column</option>
+    </param>
+    <param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels">
+      <option value="no">Print all lines</option>
+      <option value="yes">Print only lines containing indels</option>
+    </param>
+    <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" />
+    <conditional name="c">
+      <param name="consensus" type="select" label="Call consensus according to MAQ model?">
+        <option selected="true" value="no">No</option>
+        <option value="yes">Yes</option>
+      </param> 
+      <when value="no" />
+      <when value="yes">
+        <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
+        <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
+        <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
+        <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
+      </when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output1" label="${tool.name} on ${on_string}: converted pileup" />
+  </outputs>
+  <tests>
+    <test>
+      <!--
+      Bam to pileup command:
+      samtools faidx chr_m.fasta
+      samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
+      chr_m.fasta is the prefix of the index
+      -->
+      <param name="reference" value="history" />
+      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
+      <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
+      <param name="lastCol" value="no" />
+      <param name="indels" value="no" />
+      <param name="mapCap" value="60" />
+      <param name="consensus" value="no" />
+      <output name="output1" file="sam_pileup_out1.pileup" />
+    </test>
+    <test>
+      <!--
+      Bam to pileup command:
+      samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
+      chr_m.fasta is the prefix of the index
+      -->
+      <param name="reference" value="indexed" />
+      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
+      <param name="lastCol" value="no" />
+      <param name="indels" value="no" />
+      <param name="mapCap" value="60" />
+      <param name="consensus" value="yes" />
+      <param name="theta" value="0.85" />
+      <param name="hapNum" value="2" />
+      <param name="fraction" value="0.001" />
+      <param name="phredProb" value="40" />
+      <output name="output1" file="sam_pileup_out2.pileup" />
+    </test>
+  </tests>
+  <help>
+
+**What it does**
+
+Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.
+
+.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+
+------
+
+**Types of pileup datasets**
+
+The description of pileup format below is largely based on information that can be found on SAMTools Pileup_ documentation page. The 6- and 10-column variants are described below.
+
+.. _Pileup: http://samtools.sourceforge.net/pileup.shtml
+
+**Six column pileup**::
+
+    1    2  3  4        5        6
+ ---------------------------------
+ chrM  412  A  2       .,       II
+ chrM  413  G  4     ..t,     IIIH
+ chrM  414  C  4     ...a     III2
+ chrM  415  C  4     TTTt     III7
+   
+where::
+
+  Column Definition
+ ------- ----------------------------
+       1 Chromosome
+       2 Position (1-based)
+       3 Reference base at that position
+       4 Coverage (# reads aligning over that position)
+       5 Bases within reads where (see Galaxy wiki for more info)
+       6 Quality values (phred33 scale, see Galaxy wiki for more)
+       
+**Ten column pileup**
+
+The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
+
+
+    1    2  3  4   5   6   7   8       9       10
+ ------------------------------------------------
+ chrM  412  A  A  75   0  25  2       .,       II
+ chrM  413  G  G  72   0  25  4     ..t,     IIIH
+ chrM  414  C  C  75   0  25  4     ...a     III2
+ chrM  415  C  T  75  75  25  4     TTTt     III7
+
+where::
+
+  Column Definition
+ ------- --------------------------------------------------------
+       1 Chromosome
+       2 Position (1-based)
+       3 Reference base at that position
+       4 Consensus bases
+       5 Consensus quality
+       6 SNP quality
+       7 Maximum mapping quality
+       8 Coverage (# reads aligning over that position)
+       9 Bases within reads where (see Galaxy wiki for more info)
+      10 Quality values (phred33 scale, see Galaxy wiki for more)
+
+
+.. _consensus: http://samtools.sourceforge.net/cns0.shtml
+
+------
+
+**Citation**
+
+For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_
+
+
+  </help>
+</tool>
+
+