view CoverageReport.xml @ 13:a030e3cd3da5 draft

Uploaded
author geert-vandeweyer
date Thu, 20 Feb 2014 08:57:52 -0500
parents 86df3f847a72
children a24c8e81cee0
line wrap: on
line source

<tool id="CoverageReport2" name="Panel Coverage Report" version="0.0.2">
  <description></description>
  
  <command interpreter="perl">
    CoverageReport.pl
      ## input files
      -b $input1
      -t $input2

      ## output files
      -o $output1
      -z $output2

      ## run parameters
      $perGene 
      $PositionLevel
      -m $threshold
      -f $frac 
      ## sample name
      #if $namefromselect.namesource == "typed" :
          -n "${namefromselect.typedname}"
      #elif $namefromselect.namesource == "other":
          -n "${namefromselect.namefile.display_name}"
      #elif $namefromselect.namesource == "bam":
	  -n "${input1.display_name}"
      #else:
          -n "Unspecified"
      #end if
  </command>
  <requirements>
    <requirement type="package" version="3.0.2">R</requirement>
    <requirement type="package" version="2.18.2">bedtools</requirement>
    <requirement type="package" version="0.1.18">samtools</requirement>
    <requirement type="binary">pdflatex</requirement>
  </requirements>
  <inputs>
        <param name="input1" type="data" format="bam" label="BAM file" help="BAM file of mapped reads" />
        <param name="input2" type="data" format="bed" label="Target Regions BED" help="BED file containing regions of interest. See below for format" />
        <param name="threshold" type="integer" value="40" label="Minimal Coverage Threshold" help="Default: 40" />   
        <param name="frac" type="float" value="0.2" label="Fraction of Average Coverage for usage in plot" help="Default: 0.2" />
	<param name="perGene" type="select" label="Plot exon coverages for all genes in targets">
		<option value='-r'>Yes</option>
		<option value=''>No</option>
	</param>         
	<param name="PositionLevel" type="select" label="Perform Per Exon Analysis" help="Only Failed: Only those exons not reaching global coverage above threshold, or 100%. All Exons: This can take a very long time for large panels! Select all failed to check all exons for local failures." >
		<option value='' selected="TRUE">None</option>
		<option value='-s'>Plot Only Globally Failed</option>
		<option value='-S'>Plot All Failed Exons</option>
		<option value='-A'>Plot All Exons</option>
		<option value='-L'>List All Failed Exons</option>
	</param>
	<conditional name="namefromselect">
	  <param name="namesource" type="select" label="Type the name of the sample or take the name of an input file?">
		<option value="typed">Type the samplename</option>
		<option value="bam">Use the BAM File name</option>
		<option value="other">Select a file to base the name on</option>
	  </param>
 	  <when value="typed">
		<param name="typedname" type="text" size="25" label="Sample Name for Report." />
	  </when>
	  <when value="other">
		<param name="namefile" type="data" format="sam,bam,fastq,fasta,bed,fastqsanger,fastqillumina,text" label="Select a file from the history to base the sample name upon" />
	  </when>
        </conditional>
  </inputs>
  <outputs>
    <data format="pdf" name="output1" label="${tool.name} on ${on_string}: PDF Report"/>
    <data format="tar.gz" name="output2" label="${tool.name} on ${on_string}: Plots And Tables"/>
  </outputs>
  <help>

**What it does**

This tool creates a coverage report for QC purposes. By default, average coverage statistics are provided, taken from samtools flagstats. If specified, it can also create overviews per gene in the BED file, and sub-exon plots for failed exons. 

------

**BED format**

The BED file containing targets of interest has very specific format requirements. You **must** use the following format::

  Column 1: Chromosome : Use the same syntax as the references used by Galaxy. Check your sam-headers for the correct format. ('chr1' vs '1')
  Column 2: Start Position
  Column 3: End Position
  Column 4: Target Name. Use : "GENE-NAME&lt;space&gt;Exon_number" : This is split on the space after 'GeneName' for correct grouping.
  Column 5: Score : ignored, use '0'
  Column 6: Strand: ignored,'+' or '-'

.. class:: infomark 

Note: The exons for the plots will be ordered in the same way as the exons in the BED file. 

------

**Input formats**

BAM file for reads, BED file for targets.

------

**Outputs**

The output files are a PDF report and a tar.gz file with all the plots and output tables. 
The output tables are (tab seperated txt files): 

**Targets.Global.Coverage** : Original BED file + following columns::
  - Total coverage in target
  - Bases in target with coverage
  - Length of target
  - Percent of target covered

**Targets.Position.Coverage** : Original BED file + following columns::
  - Position in target region
  - Coverage at position

  </help>
</tool>