comparison peakzilla.xml @ 0:ca3ec50bfd94 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/peakzilla commit defa1857cbd66d5f90e3e6f98dc11e1d215b742a
author iuc
date Mon, 26 Feb 2024 10:55:19 +0000
parents
children 8badcbe5792c
comparison
equal deleted inserted replaced
-1:000000000000 0:ca3ec50bfd94
1 <tool id="peakzilla" name="Peakzilla" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>Identify transcription factor binding sites from ChIP-seq and ChIP-exo experiments</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <requirements>
8 <requirement type="package" version="2.7">python</requirement>
9 <requirement type="package" version="@TOOL_VERSION@">peakzilla</requirement>
10 </requirements>
11 <stdio>
12 <exit_code range="1:" level="fatal" description="Generic error"/>
13 <regex match="ValueError: cannot convert float NaN to integer"
14 level="fatal"
15 description="No peaks detected or input data error"/>
16 </stdio>
17 <command>
18 <![CDATA[
19 peakzilla.py
20 #if $options.model_peaks
21 -m '$options.model_peaks'
22 #end if
23 #if $options.enrichment_cutoff
24 -c '$options.enrichment_cutoff'
25 #end if
26 #if $options.score_cutoff
27 -s '$options.score_cutoff'
28 #end if
29 #if $options.fragment_size
30 -f '$options.fragment_size'
31 #end if
32 #if $options.gaussian
33 -e
34 #end if
35 #if $options.bedpe
36 -p
37 #end if
38 #if $outputs.negative
39 -n
40 #end if
41 #if $outputs.log
42 -l log.txt
43 #end if
44 '$chip_bed' '$input_bed' > '$results'
45 ]]>
46 </command>
47 <inputs>
48 <param name="chip_bed" type="data" format="bed" label="ChIP Dataset in BED format"/>
49 <param name="input_bed" type="data" format="bed" label="Input Dataset in BED format"/>
50 <section name="options" title="Optional Parameters" expanded="false">
51 <param name="model_peaks" type="integer" optional="true" min="1" label="Number of most highly enriched regions used to estimate peak size (Default: 200)"/>
52 <param name="enrichment_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for fold enrichment (Default: 2)"/>
53 <param name="score_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for peak score (Default: 1)"/>
54 <param name="fragment_size" type="integer" optional="true" min="1" label="Manually set fragment size in bp" help="If not set, it will be estimated from data"/>
55 <param name="gaussian" type="boolean" checked="false" label="Use empirical model estimate instead of gaussian"/>
56 <param name="bedpe" type="boolean" checked="false" label="Input is paired end and in BEDPE format"/>
57 </section>
58 <section name="outputs" title="Output Options" expanded="false">
59 <param name="negative" type="boolean" checked="false" label="Output negative peaks in control sample"/>
60 <param name="log" type="boolean" checked="false" label="Output log file"/>
61 </section>
62 </inputs>
63 <outputs>
64 <data name="results" format="tabular" label="${tool.name} on ${on_string}"/>
65 <data name="log" format="txt" from_work_dir="log.txt" label="Log file for ${tool.name} on ${on_string}">
66 <filter>log</filter>
67 </data>
68 <data name="negative_peaks" format="tabular" from_work_dir="negative_peaks.tsv" label="Negative peaks for ${tool.name} on ${on_string}">
69 <filter>negative</filter>
70 </data>
71 </outputs>
72 <tests>
73 <test expect_num_outputs="3">
74 <param name="chip_bed" value="chip.bed" />
75 <param name="input_bed" value="input.bed" />
76 <param name="model_peaks" value="200" />
77 <param name="fragment_size" value="50" />
78 <param name="enrichment_cutoff" value="2" />
79 <param name="score_cutoff" value="1" />
80 <param name="log" value="true" />
81 <param name="negative" value="true" />
82 <output name="results" file="results.tsv">
83 <assert_contents>
84 <has_text text="Peak_1" />
85 </assert_contents>
86 </output>
87 <output name="log" file="log.txt" lines_diff="30">
88 <assert_contents>
89 <has_text text="9569"/>
90 </assert_contents>
91 </output>
92 <output name="negative_peaks" file="negative_peaks.tsv">
93 <assert_contents>
94 <has_text text="Chromosome" />
95 </assert_contents>
96 </output>
97 </test>
98 </tests>
99 <help><![CDATA[
100 **Peakzilla**
101
102 Peakzilla identifies sites of enrichment and transcription factor binding sites from transcription factor ChIP-seq and ChIP-exo experiments at high accuracy and resolution.
103 It is designed to perform equally well for data from any species. All necessary parameters are estimated from the data. Peakzilla is suitable for both single and
104 paired-end data from any sequencing platform.
105
106 Note that peakzilla is not suited for the identification of broad regions of enrichment (e.g. ChIP-seq for histone marks), we recommend using MACS instead: Zhang et al.
107 Model-based Analysis of ChIP-Seq (MACS). Genome Biol (2008) 9(9):R137
108
109 *INPUT FORMAT*
110 Peakzilla accepts BED formatted alignments as input.
111
112 For conversation to BED format and working with BED files and alignments in
113 general I highly recommend:
114
115 * bowtie (http://bowtie-bio.sourceforge.net/)
116 * SAMtools (http://samtools.sourceforge.net/)
117 * bedtools (http://code.google.com/p/bedtools/)
118
119
120 *WORKFLOW EXAMPLE*
121 # use bowtie to map uniquely mappable reads to the genome
122 bowtie -p4 -m1 --sam genome_index input.fastq input.sam
123 bowtie -p4 -m1 --sam genome_index chip.fastq chip.sam
124
125 # convert to BAM format
126 samtools view -bS input.sam > input.bam
127 samtools view -bS chip.sam > chip.bam
128
129 # convert to BED format
130 bamToBed -i input.bam > input.bed
131 bamToBed -i chip.bam > chip.bed
132
133 # run peakzilla
134 python peakzilla.py chip.bed input.bed > chip_peaks.tsv
135
136 # Comparison of 2 datasets
137 # Determine significant peaks with a score threshold of 10
138 python peakzilla.py -s 10 chip1.bed input1.bed > chip1_s10_peaks.tsv
139 # Determine enriched regions with a score threshold of 2
140 python peakzilla.py -s 2 chip2.bed input2.bed > chip2_s2_peaks.tsv
141 # Overlap significant peaks from chip1 with enriched regions from chip2
142 intersectBed -a chip1_s10_peaks.tsv -b chip2_s2_peaks.tsv > intersect_peaks.tsv
143
144 For example datasets as well as an example of a computational pipeline for the comparative analysis of ChIP-seq datasets, please refer to our
145 publication: Bardet AF et al. A computational pipeline for comparative ChIP-seq analyses. Nature Protocols (2011) 7(1):45-61 (http://www.starklab.org/data/bardet_natprotoc_2011/)
146
147 *OPTIONS*
148 One of peakzilla's design goals is to learn all the necessary information
149 from the data. The usage of the options should therefore not be required.
150
151 *OUTPUT FORMAT*
152 * Results are printed as a table of tab-delimited values to stdout
153 * Logs are appended to logs.txt in the current directory or a custom directory/filename specified by the -l option
154 * Enriched regions in the control sample are written to negative_peaks.tsv or a custom directory/filename specified by the -n option
155 * Columns represent Chromosome / Start / End / Name / Summit / Score / ChIP / Control / FoldEnrichment / DistributionScore / FDR (%)
156 ]]></help>
157 <citations>
158 <citation type="doi">10.1038/nprot.2011.420</citation>
159 </citations>
160 </tool>