comparison footprint.xml @ 0:4bff424dfa47 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/footprint commit 3a110c632920d1ed94b78053184978040df3edaf
author rnateam
date Tue, 02 May 2017 15:05:59 -0400
parents
children 0d94a529f925
comparison
equal deleted inserted replaced
-1:000000000000 0:4bff424dfa47
1 <?xml version="1.0" encoding="UTF-8"?>
2 <tool id="footprint" name="footprint" version="1.0.0">
3 <requirements>
4 <requirement type="package" version="1.0.0">footprint</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 </stdio>
9 <command><![CDATA[
10 ln -s '$bam_file' ./bam_file.bam
11
12 &&
13
14 find_footprints.sh
15
16 ./bam_file.bam
17
18 '$chrom_sizes'
19
20 '$motif_coords'
21
22 ## genome source
23 #if $refGenomeSource.genomeSource == "history":
24 '$refGenomeSource.ownFile'
25 #else
26 '$refGenomeSource.builtin.fields.path'
27 #end if
28
29 $factor_name
30
31 '$bias_file'
32
33 '$peak_file'
34
35 $no_of_components
36
37 $background
38
39 $fixed_bg
40
41 &&
42 mv *.PARAM PARAM
43 &&
44 mv *.RESULTS RESULTS
45 &&
46 mv *.plot2.png plot2.png
47 &&
48 mv *.plot1.png plot1.png
49 ]]>
50 </command>
51 <inputs>
52 <param name="bam_file" type="data" format="BAM" label="alignment bam file" help="" />
53 <param name="chrom_sizes" type="data" format="tablular" label="chromosome length" help="" />
54 <param name="motif_coords" type="data" format="BED" label="coordinates of motif" help="" />
55 <conditional name="refGenomeSource">
56 <param name="genomeSource" type="select"
57 label="Will you select a reference genome from your
58 history or use a built-in genome?"
59 help="The version of genome against which the reads were aligned.">
60 <option value="fai" selected="True">
61 Use a built-in genome</option>
62 <option value="history">
63 Use a genome from my current history</option>
64 </param>
65 <when value="fai">
66 <param name="builtin" type="select"
67 label="Select a reference genome">
68 <options from_data_table="sam_fa_indices">
69 <filter type="sort_by" column="1" />
70 <validator type="no_options"
71 message="A built-in reference genome is not available
72 for the build associated with the selected input file"/>
73 </options>
74 </param>
75 </when>
76 <when value="history">
77 <param name="ownFile" type="data" format="fasta"
78 label="Select the reference genome" help="Genome sequences in FASTA format" />
79 </when>
80 </conditional>
81 <param name="factor_name" type="text" label="transcription factor" help="e.g. CTCF" />
82 <param name="bias_file" type="data" format="tabular,txt" label="cleavage/transposition bias" help="" />
83 <param name="peak_file" type="data" format="tabular" label="coordinates of ChIP-seq peaks" help="" />
84 <param name="no_of_components" type="select" label="number of components">
85 <option value="2" selected="true">2</option>
86 <option value="3">3</option>
87 </param>
88 <param name="background" type="select" label="background components">
89 <option value="Seq" selected="true">Seq</option>
90 <option value="Flat">Flat</option>
91 </param>
92 <param name="fixed_bg" type="select" label="fixed background component">
93 <option value="TRUE" selected="true">TRUE</option>
94 <option value="FALSE">FALSE</option>
95 </param>
96 </inputs>
97 <outputs>
98 <data name="RESULTS" format="tabular" from_work_dir="RESULTS" label="${tool.name} on ${on_string}: results" />
99 <data name="PARAM" format="txt" from_work_dir="PARAM" label="${tool.name} on ${on_string}: parameters" />
100 <data name="plot1" format="png" from_work_dir="plot1.png" label="${tool.name} on ${on_string}: plot 1" />
101 <data name="plot2" format="png" from_work_dir="plot2.png" label="${tool.name} on ${on_string}: plot 2" />
102 </outputs>
103 <tests>
104 <test>
105 <param name="bam_file" value="input_ATAC_HEK293_hg19_chr1.bam" />
106 <param name="chrom_sizes" value="input_hg19.chr1.chrom.size" />
107 <param name="motif_coords" value="input_CTCF_motifs_hg19_chr1.bed" />
108 <param name="genomeSource" value="history" />
109 <param name="ownFile" value="input_hg19_chr1.fa" />
110 <param name="factor_name" value="CTCF" />
111 <param name="bias_file" value="input_SeqBias_ATAC.txt" />
112 <param name="peak_file" value="input_CTCF_HEK293_chip_hg19_chr1.bed" />
113 <param name="no_of_components" value="2" />
114 <param name="background" value="Seq" />
115 <param name="fixed_bg" value="TRUE" />
116 <output name="RESULTS" file="output.RESULTS" ftype="tabular" compare="sim_size"/>
117 <output name="PARAM" file="output.PARAM" ftype="txt" compare="sim_size"/>
118 <output name="plot1" file="output_plot1.png" ftype="png" compare="sim_size" delta="15000" />
119 <output name="plot2" file="output_plot2.png" ftype="png" compare="sim_size" delta="15000" />
120 </test>
121 </tests>
122 <help><![CDATA[.. class:: infomark
123
124 **Purpose**
125
126 This is a pipeline to find transcription factor footprints in ATAC-seq or DNase-seq data.
127
128
129 -----
130
131 .. class:: infomark
132
133 **Inputs**
134
135 alignment bam file
136 * A bam file from the ATAC-seq or DNase-seq experiment.
137
138 chromosome length
139 * A tab delimited file with 2 columns.
140 * The first column is the chromosome name and the second column is the chromosome length for the appropriate organism and genome build.
141 * Example: chr1 10000000
142
143 coordinates of motif
144 * A 6-column bed file with the coordinates of motif matches (eg resulting from scanning the genome with a PWM) for the transcription factor of interest.
145 * The 6 columns should contain chromosome, start coordinate, end coordinate, name, score and strand information in this order. The coordinates should be closed (1-based).
146 * Example: chr1 24782 24800 . 11.60 -
147
148 transcription factor
149 * The name of the transcription factor of interest supplied by the user, e.g. CTCF.
150
151 cleavage/transposition bias
152 * The cleavage/transposition bias of the different protocols, for all 6-mers.
153 * Provided `options`_: ATAC, DNase double hit or DNase single hit protocols.
154
155 .. _options: https://ohlerlab.mdc-berlin.de/software/Reproducible_footprinting_139/
156
157 coordinates of ChIP-seq peaks
158 * A file with the coordinates of the ChIP-seq peaks for the transcription factor of interest.
159 * The format is flexible as long as the first 3 columns (chromosome, start coordinate, end coordinate) are present.
160 * Example: chr1 237622 237882
161
162 number of components
163 * Total number of footprint and background components that should be learned from the data.
164 * Options are 2 (1 fp and 1 bg) and 3 (2 fp and 1 bg) components.
165
166 background components
167 * The mode of initialization for the background component. Options are "Flat" or "Seq".
168 * Choosing "Flat" initializes this component as a uniform distribution.
169 * Choosing "Seq" initializes it as the signal profile that would be expected solely due to the protocol bias (given by the cleavage/transposition bias file).
170
171 fixed background component
172 * Whether the background component should be kept fixed.
173 * Options are TRUE or FALSE.
174 * Setting "TRUE" keeps this component fixed, whereas setting "FALSE" lets it be reestimated during training.
175
176 -----
177
178 .. class:: infomark
179
180 **Outputs**
181
182 results
183 * The results of the footprinting analysis.
184 * The first 6 columns harbor the motif information (identical to the 'coordinates of motif').
185 * The 7th column has the footprint score (log-odds of footprint versus background) for each motif instance.
186 * The following columns show the probabilities for the individual footprint and background components.
187
188 parameters
189 * Gives the trained parameters for the footprint and background components.
190 * It includes as many lines as components (eg the first line has the parameters for the first component).
191
192 plot 1
193 * A plot with two panels, showing the initial components above and the final trained components below.
194 * The plotted values for the final components are given in the 'parameters' output file explained above.
195
196 plot 2
197 * A plot only with the final trained components.
198 * In a model where 2 components are used, this plot is identical to the bottom panel in plot1.
199 * When 3 components are used, this plot shows the weighted average of the 2 footprint components as the final footprint profile.
200
201 ]]></help>
202 <citations>
203 <citation type="bibtex">@ARTICLE{footprint,
204 author = {Aslihan Karabacak and Uwe Ohler},
205 title = {To submit},
206 journal = {},
207 year = {},
208 volume = {},
209 pages = {}
210 }</citation>
211 </citations>
212 </tool>