Mercurial > repos > rnateam > footprint

diff footprint.xml @ 0:4bff424dfa47 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/footprint commit 3a110c632920d1ed94b78053184978040df3edaf
author: rnateam
date: Tue, 02 May 2017 15:05:59 -0400
children: 0d94a529f925
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/footprint.xml	Tue May 02 15:05:59 2017 -0400
@@ -0,0 +1,212 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<tool id="footprint" name="footprint" version="1.0.0">
+    <requirements>
+        <requirement type="package" version="1.0.0">footprint</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command><![CDATA[
+            ln -s '$bam_file' ./bam_file.bam
+
+            &&
+
+            find_footprints.sh
+
+            ./bam_file.bam
+
+            '$chrom_sizes'
+
+            '$motif_coords'
+
+            ## genome source
+            #if $refGenomeSource.genomeSource == "history":
+                '$refGenomeSource.ownFile'
+            #else
+                '$refGenomeSource.builtin.fields.path'
+            #end if
+
+            $factor_name
+
+            '$bias_file'
+
+            '$peak_file'
+
+            $no_of_components
+
+            $background
+
+            $fixed_bg
+
+            &&
+            mv *.PARAM PARAM
+            &&
+            mv *.RESULTS RESULTS
+            &&
+            mv *.plot2.png plot2.png
+            &&
+            mv *.plot1.png plot1.png
+]]>
+    </command>
+    <inputs>
+        <param name="bam_file" type="data" format="BAM" label="alignment bam file" help="" />
+        <param name="chrom_sizes" type="data" format="tablular" label="chromosome length" help="" />
+        <param name="motif_coords" type="data" format="BED" label="coordinates of motif" help="" />
+        <conditional name="refGenomeSource">
+            <param name="genomeSource" type="select"
+                label="Will you select a reference genome from your
+                history or use a built-in genome?"
+                help="The version of genome against which the reads were aligned.">
+                <option value="fai" selected="True">
+                    Use a built-in genome</option>
+                <option value="history">
+                    Use a genome from my current history</option>
+            </param>
+            <when value="fai">
+            <param name="builtin" type="select"
+                label="Select a reference genome">
+                <options from_data_table="sam_fa_indices">
+                    <filter type="sort_by" column="1" />
+                    <validator type="no_options"
+                    message="A built-in reference genome is not available
+                    for the build associated with the selected input file"/>
+                </options>
+            </param>
+            </when>
+            <when value="history">
+                <param name="ownFile" type="data" format="fasta"
+                label="Select the reference genome"  help="Genome sequences in FASTA format" />
+            </when>
+        </conditional>
+        <param name="factor_name" type="text" label="transcription factor" help="e.g. CTCF" />
+        <param name="bias_file" type="data"  format="tabular,txt" label="cleavage/transposition bias" help="" />
+        <param name="peak_file" type="data" format="tabular" label="coordinates of ChIP-seq peaks" help="" />
+        <param name="no_of_components" type="select" label="number of components">
+            <option value="2" selected="true">2</option>
+            <option value="3">3</option>
+        </param>
+        <param name="background" type="select" label="background components">
+            <option value="Seq" selected="true">Seq</option>
+            <option value="Flat">Flat</option>
+        </param>
+        <param name="fixed_bg" type="select" label="fixed background component">
+            <option value="TRUE" selected="true">TRUE</option>
+            <option value="FALSE">FALSE</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="RESULTS" format="tabular" from_work_dir="RESULTS" label="${tool.name} on ${on_string}: results" />
+        <data name="PARAM" format="txt" from_work_dir="PARAM" label="${tool.name} on ${on_string}: parameters" />
+        <data name="plot1" format="png" from_work_dir="plot1.png" label="${tool.name} on ${on_string}: plot 1" />
+        <data name="plot2" format="png" from_work_dir="plot2.png" label="${tool.name} on ${on_string}: plot 2" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="bam_file" value="input_ATAC_HEK293_hg19_chr1.bam" />
+            <param name="chrom_sizes" value="input_hg19.chr1.chrom.size" />
+            <param name="motif_coords" value="input_CTCF_motifs_hg19_chr1.bed" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" value="input_hg19_chr1.fa" />
+            <param name="factor_name" value="CTCF" />
+            <param name="bias_file" value="input_SeqBias_ATAC.txt" />
+            <param name="peak_file" value="input_CTCF_HEK293_chip_hg19_chr1.bed" />
+            <param name="no_of_components" value="2" />
+            <param name="background" value="Seq" />
+            <param name="fixed_bg" value="TRUE" />
+            <output name="RESULTS" file="output.RESULTS" ftype="tabular" compare="sim_size"/>
+            <output name="PARAM" file="output.PARAM" ftype="txt"  compare="sim_size"/>
+            <output name="plot1" file="output_plot1.png" ftype="png" compare="sim_size" delta="15000" />
+            <output name="plot2" file="output_plot2.png" ftype="png" compare="sim_size" delta="15000" />
+        </test>
+    </tests>
+    <help><![CDATA[.. class:: infomark
+
+**Purpose**
+
+This is a pipeline to find transcription factor footprints in ATAC-seq or DNase-seq data.
+
+
+-----
+
+.. class:: infomark
+
+**Inputs**
+
+alignment bam file
+ * A bam file from the ATAC-seq or DNase-seq experiment.
+
+chromosome length
+ * A tab delimited file with 2 columns.
+ * The first column is the chromosome name and the second column is the chromosome length for the appropriate organism and genome build.
+ * Example: chr1    10000000
+
+coordinates of motif
+ * A 6-column bed file with the coordinates of motif matches (eg resulting from scanning the genome with a PWM) for the transcription factor of interest.
+ * The 6 columns should contain chromosome, start coordinate, end coordinate, name, score and strand information in this order. The coordinates should be closed (1-based).
+ * Example: chr1    24782   24800   .       11.60   -
+
+transcription factor
+ * The name of the transcription factor of interest supplied by the user, e.g. CTCF.
+
+cleavage/transposition bias
+ * The cleavage/transposition bias of the different protocols, for all 6-mers.
+ * Provided `options`_: ATAC, DNase double hit or DNase single hit protocols.
+
+ .. _options: https://ohlerlab.mdc-berlin.de/software/Reproducible_footprinting_139/
+
+coordinates of ChIP-seq peaks
+ * A file with the coordinates of the ChIP-seq peaks for the transcription factor of interest.
+ * The format is flexible as long as the first 3 columns (chromosome, start coordinate, end coordinate) are present.
+ * Example: chr1    237622  237882
+
+number of components
+ * Total number of footprint and background components that should be learned from the data.
+ * Options are 2 (1 fp and 1 bg) and 3 (2 fp and 1 bg) components.
+
+background components
+ * The mode of initialization for the background component. Options are "Flat" or "Seq".
+ * Choosing "Flat" initializes this component as a uniform distribution.
+ * Choosing "Seq" initializes it as the signal profile that would be expected solely due to the protocol bias (given by the cleavage/transposition bias file).
+
+fixed background component
+ * Whether the background component should be kept fixed.
+ * Options are TRUE or FALSE.
+ * Setting "TRUE" keeps this component fixed, whereas setting "FALSE" lets it be reestimated during training.
+
+-----
+
+.. class:: infomark
+
+**Outputs**
+
+results
+ * The results of the footprinting analysis.
+ * The first 6 columns harbor the motif information (identical to the 'coordinates of motif').
+ * The 7th column has the footprint score (log-odds of footprint versus background) for each motif instance.
+ * The following columns show the probabilities for the individual footprint and background components.
+
+parameters
+ * Gives the trained parameters for the footprint and background components.
+ * It includes as many lines as components (eg the first line has the parameters for the first component).
+
+plot 1
+ * A plot with two panels, showing the initial components above and the final trained components below.
+ * The plotted values for the final components are given in the 'parameters' output file explained above.
+
+plot 2
+ * A plot only with the final trained components.
+ * In a model where 2 components are used, this plot is identical to the bottom panel in plot1.
+ * When 3 components are used, this plot shows the weighted average of the 2 footprint components as the final footprint profile.
+
+]]></help>
+    <citations>
+        <citation type="bibtex">@ARTICLE{footprint,
+        author = {Aslihan Karabacak and Uwe Ohler},
+        title = {To submit},
+        journal = {},
+        year = {},
+        volume = {},
+        pages = {}
+        }</citation>
+    </citations>
+</tool>
author	rnateam
date	Tue, 02 May 2017 15:05:59 -0400
parents
children	0d94a529f925