Mercurial > repos > pjbriggs > rnachipintegrator

diff rnachipintegrator_wrapper.xml @ 0:d9c1f2133124 draft
Uploaded initial version 0.4.4.
author: pjbriggs
date: Tue, 30 Jun 2015 06:44:06 -0400
children: 5f69a2c1b9c9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rnachipintegrator_wrapper.xml	Tue Jun 30 06:44:06 2015 -0400
@@ -0,0 +1,262 @@
+<?xml version="1.0" encoding="utf-8"?>
+<tool id="rnachipintegrator_wrapper" name="RnaChipIntegrator" version="@VERSION@-0">
+  <description>Integrated analysis of gene expression data and ChIP data</description>
+  <macros>
+    <import>rnachipintegrator_macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <expand macro="version_command" />
+  <command interpreter="bash">rnachipintegrator_wrapper.sh
+  #if str( $analysis_options.peak_type ) == "summits"
+    #if str( $analysis_options.window ) != ""
+    --window=$analysis_options.window
+    #end if
+    #if str( $analysis_options.cutoff ) != ""
+    --cutoff=$analysis_options.cutoff
+    #end if
+  #end if
+  #if str( $analysis_options.peak_type ) == "regions"
+    #if str( $analysis_options.edge_cutoff ) != ""
+    --edge-cutoff=$analysis_options.edge_cutoff
+    #end if
+    #if str( $analysis_options.number ) != ""
+    --number=$analysis_options.number
+    #end if
+    #if (str( $analysis_options.promoter_start ) != "" and str( $analysis_options.promoter_end ))
+    --promoter_region=$analysis_options.promoter_start,$analysis_options.promoter_end
+    #end if
+    #if $analysis_options.pad_output
+    --pad
+    #end if
+  #end if
+  $rnaseq $chipseq
+  --output_xls $xls_output
+  #if $results_as_zip
+  --zip_file $zip_file
+  #else
+    #if str( $analysis_options.peak_type ) == "summits"
+    --summit_outputs $peaks_to_transcripts_out $tss_to_summits_out
+    #end if
+    #if str( $analysis_options.peak_type ) == "regions"
+    --peak_outputs $transcripts_to_edges_out
+                   $transcripts_to_edges_summary
+                   $tss_to_edges_out
+                   $tss_to_edges_summary
+    #end if
+  #end if
+  </command>
+  <inputs>
+    <param format="tabular" name="rnaseq" type="data" label="Gene expression data file" />
+    <param format="tabular" name="chipseq" type="data" label="ChIP peaks data file" />
+  <conditional name="analysis_options">
+    <!-- user must specify if ChIP peaks are summits or regions -->
+    <param name="peak_type" type="select" label="ChIP peaks are"
+	   help="Options and outputs depend on whether ChIP data are summits or regions">
+      <option value="summits">summits</option>
+      <option value="regions">regions</option>
+    </param>
+    <when value="summits">
+      <param name="window" type="integer" value="20000" optional="true"
+	     label="Maximum distance a peak can be from each transcript
+		    TSS before being omitted from analysis" />
+      <param name="cutoff" type="integer" value="130000" optional="true"
+	     label="Maximum distance a transcript TSS can be from each
+		    peak before being omitted from the analysis" />
+    </when>
+    <when value="regions">
+      <param name="edge_cutoff" type="integer" value="10000" optional="true"
+	     label="Maximum distance a transcript edge can be from the
+		    peak edge before being omitted from the analysis"
+	     help="Set to zero to indicate that no cut off should be applied" />
+      <param name="number" type="integer" value="4" optional="true"
+	     label="Maximum number of transcripts per peak to report from
+		    from the analysis" />
+      <param name="promoter_start" type="integer" value="-10000" optional="true"
+	     label="Start of promoter region with respect to gene TSS" />
+      <param name="promoter_end" type="integer" value="2500" optional="true"
+	     label="End of promoter region with respect to gene TSS" />
+      <param name="pad_output" type="boolean" checked="false" truevalue="yes"
+	     label="Output same number of lines for each peak"
+	     help="Add blank lines in output for peaks with fewer than maximum number
+		   of hits (--pad)" />
+    </when>
+  </conditional>
+    <param name="results_as_zip" type="boolean" checked="false" truevalue="yes"
+           label="Put output tab-delimited files into a single zip archive" />
+  </inputs>
+  <outputs>
+    <!-- Always produce XLS output -->
+    <data format="xls" name="xls_output"
+	  label="All RnaChipIntegrator analyses for ${rnaseq.name} vs ${chipseq.name} (Excel spreadsheet)" />
+    <!-- Outputs only produced for summit data -->
+    <data format="tabular" name="peaks_to_transcripts_out"
+	  label="Nearest summits to transcripts for ${rnaseq.name} vs ${chipseq.name}" >
+      <filter>analysis_options['peak_type'] == "summits"</filter>
+      <filter>results_as_zip is False</filter>
+    </data>
+    <data format="tabular" name="tss_to_summits_out"
+	  label="Nearest TSS to summits for ${rnaseq.name} vs ${chipseq.name}" >
+      <filter>analysis_options['peak_type'] == "summits"</filter>
+      <filter>results_as_zip is False</filter>
+    </data>
+    <!-- Outputs only produced for peak data -->
+    <data format="tabular" name="transcripts_to_edges_out"
+	  label="Nearest transcripts to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
+      <filter>analysis_options['peak_type'] == "regions"</filter>
+      <filter>results_as_zip is False</filter>
+    </data>
+    <data format="tabular" name="transcripts_to_edges_summary"
+	  label="Nearest transcripts to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
+      <filter>analysis_options['peak_type'] == "regions"</filter>
+      <filter>results_as_zip is False</filter>
+    </data>
+    <data format="tabular" name="tss_to_edges_out"
+	  label="Nearest TSS to peak edges for ${rnaseq.name} vs ${chipseq.name}" >
+      <filter>analysis_options['peak_type'] == "regions"</filter>
+      <filter>results_as_zip is False</filter>
+    </data>
+    <data format="tabular" name="tss_to_edges_summary"
+	  label="Nearest TSS to peak edges (summary) for ${rnaseq.name} vs ${chipseq.name}" >
+      <filter>analysis_options['peak_type'] == "regions"</filter>
+      <filter>results_as_zip is False</filter>
+    </data>
+    <data format="zip" name="zip_file"
+	  label="All tab-delimited files for ${rnaseq.name} vs ${chipseq.name} (zip file)" >
+      <filter>results_as_zip is True</filter>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
+      <param name="chipseq" value="ChIP_summits.txt" ftype="tabular" />
+      <param name="peak_type" value="summits" />
+      <param name="window" value="20000" />
+      <param name="cutoff" value="130000" />
+      <!-- 
+      **NB** outputs have to be specified in order that they appear in the
+      tool (which is the order they will be written to the history) - the
+      test framework seems to use the order and ignores the "name" attribute
+      -->
+      <output name="xls_output" file="summits.xls" compare="sim_size" />
+      <output name="peaks_to_transcripts_out" file="peaks_to_transcripts.out" ftype="tabular" />
+      <output name="tss_to_summits_out" file="tss_to_summits.out" ftype="tabular" />
+    </test>
+    <test>
+      <param name="rnaseq" value="ExpressionData.txt" ftype="tabular" />
+      <param name="chipseq" value="ChIP_peaks.txt" ftype="tabular" />
+      <param name="peak_type" value="regions" />
+      <param name="edge_cutoff" value="130000" />
+      <!-- 
+      **NB** outputs have to be specified in order that they appear in the
+      tool (which is the order they will be written to the history) - the
+      test framework seems to use the order and ignores the "name" attribute
+      -->
+      <output name="xls_output" file="peaks.xls" compare="sim_size" />
+      <output name="transcripts_to_edges_out" file="transcripts_to_edges.out" ftype="tabular" />
+      <output name="transcripts_to_edges_summary" file="transcripts_to_edges.summary" ftype="tabular" />
+      <output name="tss_to_edges_out" file="tss_to_edges.out" ftype="tabular" />
+      <output name="tss_to_edges_summary" file="tss_to_edges.summary" ftype="tabular" />
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**What it does**
+
+Run RnaChipIntegrator to perform integrated analyses of gene expression
+and ChIP data, identifying the nearest ChIP peaks to each transcript
+and vice versa.
+
+For ChIP peaks defined as regions the following analyses are performed:
+
+ * **TranscriptsToPeakEdges**: reports the nearest transcripts with the smallest
+   distance from either their TSS or TES to the nearest peak edge.
+
+ * **TSSToPeakEdges**: reports the nearest transcripts with the smallest distance
+   from their TSS to the nearest peak edge.
+
+For ChIP peaks defined as summits:
+
+ * **TSSToSummits**: reports the nearest transcripts with the smallest distance
+   from the TSS to the nearest peak summit.
+
+ * **PeaksToTranscripts**: reports the nearest peak summits with the smallest
+   distance to either the TSS or TES of each transcript.
+
+The program was originally written specifically for ChIP-Seq and RNA-Seq data
+but works equally well for ChIP-chip and microarray expression data, and can
+also be used to integrate any set of genomic features (e.g. canonical genes,
+CpG islands) with expression data.
+
+RnaChipIntgerator can be obtained from
+http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
+
+-------------
+
+.. class:: infomark
+
+**Input**
+
+The expression data must be in a tab-delimited file with the following columns
+of data for each genomic feature (one feature per line):
+
+====== ========== ======================================================================
+Column Name       Description
+====== ========== ======================================================================
+     1 ID         Name used to identify the feature in the output
+     2 chr        Chromosome name
+     3 start      Start position of the feature
+     4 end        End position of the feature
+     5 strand     Must be either '+' or '-'
+     6 diff_expr  Optional: indicates feature is differentially expressed (1) or not (0)
+====== ========== ======================================================================
+
+The ChIP-seq data must be in a tab-delimited file with 3 columns of data for each
+ChIP peak (one per line):
+
+====== ========== ======================================================================
+Column Name       Description
+====== ========== ======================================================================
+     1 chr        Chromosome name (must match one of those in expression data file)
+     2 start      Start position of the peak 
+     3 end        End position of the peak (start + 1 for summit data)
+====== ========== ======================================================================
+
+The ChIP peak data can be either the summit (in which case 'end' - 'start' = 1) or the
+entire extent of the binding region (with 'start' and 'end' indicating the limits).
+
+-------------
+
+.. class:: infomark
+
+**Output**
+
+The outputs from this tool vary depending on the type of data that is input, however
+generally there is one tab-delimited results file for each analysis described above
+in the **What it does** section (some analyses output a second file with just the
+"best" hits).
+
+A history item will be generated for each output file, unless the option to put them
+into a single zip archive is selected; this archive file will have to be downloaded
+and unzipped on your local machine. It is recommended that you refer to the
+RnaChipIntegrator documentation for information on the contents of each output file:
+https://github.com/fls-bioinformatics-core/RnaChipIntegrator/blob/master/doc/MANUAL.markdown
+
+In addition an Excel spreadsheet (with one page for each analysis performed) is always
+produced.
+
+-------------
+
+.. class:: infomark
+
+**Credits**
+
+This Galaxy tool has been developed within the Bioinformatics Core Facility at the
+University of Manchester. It runs the RnaChipIntegrator package which has also been
+developed by this group, and is documented at
+http://fls-bioinformatics-core.github.com/RnaChipIntegrator/
+
+Please kindly acknowledge the Bioinformatics Core Facility if you use this tool.
+  </help>
+</tool>
author	pjbriggs
date	Tue, 30 Jun 2015 06:44:06 -0400
parents
children	5f69a2c1b9c9