Mercurial > repos > pjbriggs > rnachipintegrator

<?xml version="1.0" encoding="utf-8"?>
<tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@-0">
  <description>Integrated analysis of gene expression data and ChIP data</description>
  <macros>
    <import>rnachipintegrator_macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <expand macro="version_command" />
  <command interpreter="bash">rnachipintegrator_wrapper.sh
  #if str( $analysis_options.peak_type ) == "summits"
    #if str( $analysis_options.window ) != ""
    --window=$analysis_options.window
    #end if
    #if str( $analysis_options.cutoff ) != ""
    --cutoff=$analysis_options.cutoff
    #end if
  #end if
  #if str( $analysis_options.peak_type ) == "regions"
    #if str( $analysis_options.edge_cutoff ) != ""
    --edge-cutoff=$analysis_options.edge_cutoff
    #end if
    #if str( $analysis_options.number ) != ""
    --number=$analysis_options.number
    #end if
    #if (str( $analysis_options.promoter_start ) != "" and str( $analysis_options.promoter_end ))
    --promoter_region=$analysis_options.promoter_start,$analysis_options.promoter_end
    #end if
    #if $analysis_options.pad_output
    --pad
    #end if
  #end if
  $rnaseq $chipseq
  --output_xls $xls_output
  #if $results_as_zip
  --zip_file $zip_file
  #else
    #if str( $analysis_options.peak_type ) == "summits"
    --summit_outputs $peaks_to_transcripts_out $tss_to_summits_out
    #end if
    #if str( $analysis_options.peak_type ) == "regions"
    --peak_outputs $transcripts_to_edges_out
                   $transcripts_to_edges_summary
                   $tss_to_edges_out
                   $tss_to_edges_summary
    #end if
  #end if
  </command>
  <inputs>
    <param format="tabular" name="rnaseq" type="data" label="Gene expression data file" />
    <param format="tabular" name="chipseq" type="data" label="ChIP peaks data file" />
  <conditional name="analysis_options">
    <!-- user must specify if ChIP peaks are summits or regions -->
    <param name="peak_type" type="select" label="ChIP peaks are"
	   help="Options and outputs depend on whether ChIP data are summits or regions">
      <option value="summits">summits</option>
      <option value="regions">regions</option>
    </param>
    <when value="summits">
      <param name="window" type="integer" value="20000" optional="true"
	     label="Maximum distance a peak can be from each transcript
		    TSS before being omitted from analysis" />
      <param name="cutoff" type="integer" value="130000" optional="true"
	     label="Maximum distance a transcript TSS can be from each
		    peak before being omitted from the analysis" />
    </when>
    <when value="regions">
      <param name="edge_cutoff" type="integer" value="10000" optional="true"
	     label="Maximum distance a transcript edge can be from the
		    peak edge before being omitted from the analysis"
	     help="Set to zero to indicate that no cut off should be applied" />
      <param name="number" type="integer" value="4" optional="true"
	     label="Maximum number of transcripts per peak to report from
		    from the analysis" />
      <param name="promoter_start" type="integer" value="-10000" optional="true"
	     label="Start of promoter region with respect to gene TSS" />
      <param name="promoter_end" type="integer" value="2500" optional="true"
	     label="End of promoter region with respect to gene TSS" />
      <param name="pad_output" type="boolean" checked="false" truevalue="yes"
	     label="Output same number of lines for each peak"
	     help="Add blank lines in output for peaks with fewer than maximum number
		   of hits (--pad)" />
    </when>
  </conditional>
    <param name="results_as_zip" type="boolean" checked="false" truevalue="yes"
           label="Put output tab-delimited files into a single zip archive" />
  </inputs>
  <outputs>
    <!-- Always produce XLS output -->
    <data format="xls" name="xls_output"
	  label="All RnaChipIntegrator analyses for ${rnaseq.name} vs ${chipseq.name} (Excel spreadsheet)" />
    <!-- Outputs only produced for summit data -->
    <data format="tabular" name="peaks_to_transcripts_out"
	  label="Nearest summits to transcripts for ${rnaseq.name} vs ${chipseq.name}" >
      <filter>analysis_options['peak_type'] == "summits"</filter>
      <filter>results_as_zip is False</filter>
    </data>
    <data format="tabular" name="tss_to_summits_out"
	  label="Nearest TSS to summits for ${rnaseq.name} vs ${chipseq.name}" >
      <filter>analysis_options['peak_type'] == "summits"</filter>
      <filter>results_as_zip is False</filter>
    </data>
    <!-- Outputs only produced for peak data -->
    <data format="tabular" name="transcripts_to_edges_out"
	  label="Nearest transcripts to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
      <filter>analysis_options['peak_type'] == "regions"</filter>
      <filter>results_as_zip is False</filter>
    </data>
    <data format="tabular" name="transcripts_to_edges_summary"
	  label="Nearest transcripts to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
      <filter>analysis_options['peak_type'] == "regions"</filter>
      <filter>results_as_zip is False</filter>
    </data>
    <data format="tabular" name="tss_to_edges_out"
	  label="Nearest TSS to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
      <filter>analysis_options['peak_type'] == "regions"</filter>
      <filter>results_as_zip is False</filter>
    </data>
    <data format="tabular" name="tss_to_edges_summary"
	  label="Nearest TSS to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
      <filter>analysis_options['peak_type'] == "regions"</filter>
      <filter>results_as_zip is False</filter>
    </data>
    <data format="zip" name="zip_file"
	  label="All tab-delimited files for ${rnaseq.name} vs ${chipseq.name} (zip file)" >
      <filter>results_as_zip is True</filter>
    </data>
  </outputs>
  <tests>
    <test>
      <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
      <param name="chipseq" value="ChIP_summits.txt" ftype="tabular" />
      <param name="peak_type" value="summits" />
      <param name="window" value="20000" />
      <param name="cutoff" value="130000" />
      <!--
      **NB** outputs have to be specified in order that they appear in the
      tool (which is the order they will be written to the history) - the
      test framework seems to use the order and ignores the "name" attribute
      -->
      <output name="xls_output" file="summits.xls" compare="sim_size" />
      <output name="peaks_to_transcripts_out" file="peaks_to_transcripts.out" ftype="tabular" />
      <output name="tss_to_summits_out" file="tss_to_summits.out" ftype="tabular" />
    </test>
    <test>
      <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
      <param name="chipseq" value="ChIP_peaks.txt" ftype="tabular" />
      <param name="peak_type" value="regions" />
      <param name="edge_cutoff" value="130000" />
      <!--
      **NB** outputs have to be specified in order that they appear in the
      tool (which is the order they will be written to the history) - the
      test framework seems to use the order and ignores the "name" attribute
      -->
      <output name="xls_output" file="peaks.xls" compare="sim_size" />
      <output name="transcripts_to_edges_out" file="transcripts_to_edges.out" ftype="tabular" />
      <output name="transcripts_to_edges_summary" file="transcripts_to_edges.summary" ftype="tabular" />
      <output name="tss_to_edges_out" file="tss_to_edges.out" ftype="tabular" />
      <output name="tss_to_edges_summary" file="tss_to_edges.summary" ftype="tabular" />
    </test>
  </tests>
  <help>

.. class:: infomark

**What it does**

Run RnaChipIntegrator to perform integrated analyses of gene expression
and ChIP data, identifying the nearest ChIP peaks to each transcript
and vice versa.

For ChIP peaks defined as regions the following analyses are performed:

 * **TranscriptsToPeakEdges**: reports the nearest transcripts with the smallest
   distance from either their TSS or TES to the nearest peak edge.

 * **TSSToPeakEdges**: reports the nearest transcripts with the smallest distance
   from their TSS to the nearest peak edge.

For ChIP peaks defined as summits:

 * **TSSToSummits**: reports the nearest transcripts with the smallest distance
   from the TSS to the nearest peak summit.

 * **PeaksToTranscripts**: reports the nearest peak summits with the smallest
   distance to either the TSS or TES of each transcript.

The program was originally written specifically for ChIP-Seq and RNA-Seq data
but works equally well for ChIP-chip and microarray expression data, and can
also be used to integrate any set of genomic features (e.g. canonical genes,
CpG islands) with expression data.

RnaChipIntgerator can be obtained from
http://fls-bioinformatics-core.github.com/RnaChipIntegrator/

-------------

.. class:: infomark

**Input**

The expression data must be in a tab-delimited file with the following columns
of data for each genomic feature (one feature per line):

====== ========== ======================================================================
Column Name       Description
====== ========== ======================================================================
     1 ID         Name used to identify the feature in the output
     2 chr        Chromosome name
     3 start      Start position of the feature
     4 end        End position of the feature
     5 strand     Must be either '+' or '-'
     6 diff_expr  Optional: indicates feature is differentially expressed (1) or not (0)
====== ========== ======================================================================

The ChIP-seq data must be in a tab-delimited file with 3 columns of data for each
ChIP peak (one per line):

====== ========== ======================================================================
Column Name       Description
====== ========== ======================================================================
     1 chr        Chromosome name (must match one of those in expression data file)
     2 start      Start position of the peak
     3 end        End position of the peak (start + 1 for summit data)
====== ========== ======================================================================

The ChIP peak data can be either the summit (in which case 'end' - 'start' = 1) or the
entire extent of the binding region (with 'start' and 'end' indicating the limits).

-------------

.. class:: infomark

**Output**

The outputs from this tool vary depending on the type of data that is input, however
generally there is one tab-delimited results file for each analysis described above
in the **What it does** section (some analyses output a second file with just the
"best" hits).

A history item will be generated for each output file, unless the option to put them
into a single zip archive is selected; this archive file will have to be downloaded
and unzipped on your local machine. It is recommended that you refer to the
RnaChipIntegrator documentation for information on the contents of each output file:
https://github.com/fls-bioinformatics-core/RnaChipIntegrator/blob/master/doc/MANUAL.markdown

In addition an Excel spreadsheet (with one page for each analysis performed) is always
produced.

-------------

.. class:: infomark

**Credits**

This Galaxy tool has been developed within the Bioinformatics Core Facility at the
University of Manchester. It runs the RnaChipIntegrator package which has also been
developed by this group, and is documented at
http://fls-bioinformatics-core.github.com/RnaChipIntegrator/

Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
  </help>
</tool>
author	pjbriggs
date	Tue, 30 Jun 2015 06:44:06 -0400
parents
children	5f69a2c1b9c9