Mercurial > repos > iuc > peakzilla
comparison peakzilla.xml @ 0:ca3ec50bfd94 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/peakzilla commit defa1857cbd66d5f90e3e6f98dc11e1d215b742a
author | iuc |
---|---|
date | Mon, 26 Feb 2024 10:55:19 +0000 |
parents | |
children | 8badcbe5792c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ca3ec50bfd94 |
---|---|
1 <tool id="peakzilla" name="Peakzilla" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
2 <description>Identify transcription factor binding sites from ChIP-seq and ChIP-exo experiments</description> | |
3 <macros> | |
4 <token name="@TOOL_VERSION@">1.0</token> | |
5 <token name="@VERSION_SUFFIX@">0</token> | |
6 </macros> | |
7 <requirements> | |
8 <requirement type="package" version="2.7">python</requirement> | |
9 <requirement type="package" version="@TOOL_VERSION@">peakzilla</requirement> | |
10 </requirements> | |
11 <stdio> | |
12 <exit_code range="1:" level="fatal" description="Generic error"/> | |
13 <regex match="ValueError: cannot convert float NaN to integer" | |
14 level="fatal" | |
15 description="No peaks detected or input data error"/> | |
16 </stdio> | |
17 <command> | |
18 <![CDATA[ | |
19 peakzilla.py | |
20 #if $options.model_peaks | |
21 -m '$options.model_peaks' | |
22 #end if | |
23 #if $options.enrichment_cutoff | |
24 -c '$options.enrichment_cutoff' | |
25 #end if | |
26 #if $options.score_cutoff | |
27 -s '$options.score_cutoff' | |
28 #end if | |
29 #if $options.fragment_size | |
30 -f '$options.fragment_size' | |
31 #end if | |
32 #if $options.gaussian | |
33 -e | |
34 #end if | |
35 #if $options.bedpe | |
36 -p | |
37 #end if | |
38 #if $outputs.negative | |
39 -n | |
40 #end if | |
41 #if $outputs.log | |
42 -l log.txt | |
43 #end if | |
44 '$chip_bed' '$input_bed' > '$results' | |
45 ]]> | |
46 </command> | |
47 <inputs> | |
48 <param name="chip_bed" type="data" format="bed" label="ChIP Dataset in BED format"/> | |
49 <param name="input_bed" type="data" format="bed" label="Input Dataset in BED format"/> | |
50 <section name="options" title="Optional Parameters" expanded="false"> | |
51 <param name="model_peaks" type="integer" optional="true" min="1" label="Number of most highly enriched regions used to estimate peak size (Default: 200)"/> | |
52 <param name="enrichment_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for fold enrichment (Default: 2)"/> | |
53 <param name="score_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for peak score (Default: 1)"/> | |
54 <param name="fragment_size" type="integer" optional="true" min="1" label="Manually set fragment size in bp" help="If not set, it will be estimated from data"/> | |
55 <param name="gaussian" type="boolean" checked="false" label="Use empirical model estimate instead of gaussian"/> | |
56 <param name="bedpe" type="boolean" checked="false" label="Input is paired end and in BEDPE format"/> | |
57 </section> | |
58 <section name="outputs" title="Output Options" expanded="false"> | |
59 <param name="negative" type="boolean" checked="false" label="Output negative peaks in control sample"/> | |
60 <param name="log" type="boolean" checked="false" label="Output log file"/> | |
61 </section> | |
62 </inputs> | |
63 <outputs> | |
64 <data name="results" format="tabular" label="${tool.name} on ${on_string}"/> | |
65 <data name="log" format="txt" from_work_dir="log.txt" label="Log file for ${tool.name} on ${on_string}"> | |
66 <filter>log</filter> | |
67 </data> | |
68 <data name="negative_peaks" format="tabular" from_work_dir="negative_peaks.tsv" label="Negative peaks for ${tool.name} on ${on_string}"> | |
69 <filter>negative</filter> | |
70 </data> | |
71 </outputs> | |
72 <tests> | |
73 <test expect_num_outputs="3"> | |
74 <param name="chip_bed" value="chip.bed" /> | |
75 <param name="input_bed" value="input.bed" /> | |
76 <param name="model_peaks" value="200" /> | |
77 <param name="fragment_size" value="50" /> | |
78 <param name="enrichment_cutoff" value="2" /> | |
79 <param name="score_cutoff" value="1" /> | |
80 <param name="log" value="true" /> | |
81 <param name="negative" value="true" /> | |
82 <output name="results" file="results.tsv"> | |
83 <assert_contents> | |
84 <has_text text="Peak_1" /> | |
85 </assert_contents> | |
86 </output> | |
87 <output name="log" file="log.txt" lines_diff="30"> | |
88 <assert_contents> | |
89 <has_text text="9569"/> | |
90 </assert_contents> | |
91 </output> | |
92 <output name="negative_peaks" file="negative_peaks.tsv"> | |
93 <assert_contents> | |
94 <has_text text="Chromosome" /> | |
95 </assert_contents> | |
96 </output> | |
97 </test> | |
98 </tests> | |
99 <help><![CDATA[ | |
100 **Peakzilla** | |
101 | |
102 Peakzilla identifies sites of enrichment and transcription factor binding sites from transcription factor ChIP-seq and ChIP-exo experiments at high accuracy and resolution. | |
103 It is designed to perform equally well for data from any species. All necessary parameters are estimated from the data. Peakzilla is suitable for both single and | |
104 paired-end data from any sequencing platform. | |
105 | |
106 Note that peakzilla is not suited for the identification of broad regions of enrichment (e.g. ChIP-seq for histone marks), we recommend using MACS instead: Zhang et al. | |
107 Model-based Analysis of ChIP-Seq (MACS). Genome Biol (2008) 9(9):R137 | |
108 | |
109 *INPUT FORMAT* | |
110 Peakzilla accepts BED formatted alignments as input. | |
111 | |
112 For conversation to BED format and working with BED files and alignments in | |
113 general I highly recommend: | |
114 | |
115 * bowtie (http://bowtie-bio.sourceforge.net/) | |
116 * SAMtools (http://samtools.sourceforge.net/) | |
117 * bedtools (http://code.google.com/p/bedtools/) | |
118 | |
119 | |
120 *WORKFLOW EXAMPLE* | |
121 # use bowtie to map uniquely mappable reads to the genome | |
122 bowtie -p4 -m1 --sam genome_index input.fastq input.sam | |
123 bowtie -p4 -m1 --sam genome_index chip.fastq chip.sam | |
124 | |
125 # convert to BAM format | |
126 samtools view -bS input.sam > input.bam | |
127 samtools view -bS chip.sam > chip.bam | |
128 | |
129 # convert to BED format | |
130 bamToBed -i input.bam > input.bed | |
131 bamToBed -i chip.bam > chip.bed | |
132 | |
133 # run peakzilla | |
134 python peakzilla.py chip.bed input.bed > chip_peaks.tsv | |
135 | |
136 # Comparison of 2 datasets | |
137 # Determine significant peaks with a score threshold of 10 | |
138 python peakzilla.py -s 10 chip1.bed input1.bed > chip1_s10_peaks.tsv | |
139 # Determine enriched regions with a score threshold of 2 | |
140 python peakzilla.py -s 2 chip2.bed input2.bed > chip2_s2_peaks.tsv | |
141 # Overlap significant peaks from chip1 with enriched regions from chip2 | |
142 intersectBed -a chip1_s10_peaks.tsv -b chip2_s2_peaks.tsv > intersect_peaks.tsv | |
143 | |
144 For example datasets as well as an example of a computational pipeline for the comparative analysis of ChIP-seq datasets, please refer to our | |
145 publication: Bardet AF et al. A computational pipeline for comparative ChIP-seq analyses. Nature Protocols (2011) 7(1):45-61 (http://www.starklab.org/data/bardet_natprotoc_2011/) | |
146 | |
147 *OPTIONS* | |
148 One of peakzilla's design goals is to learn all the necessary information | |
149 from the data. The usage of the options should therefore not be required. | |
150 | |
151 *OUTPUT FORMAT* | |
152 * Results are printed as a table of tab-delimited values to stdout | |
153 * Logs are appended to logs.txt in the current directory or a custom directory/filename specified by the -l option | |
154 * Enriched regions in the control sample are written to negative_peaks.tsv or a custom directory/filename specified by the -n option | |
155 * Columns represent Chromosome / Start / End / Name / Summit / Score / ChIP / Control / FoldEnrichment / DistributionScore / FDR (%) | |
156 ]]></help> | |
157 <citations> | |
158 <citation type="doi">10.1038/nprot.2011.420</citation> | |
159 </citations> | |
160 </tool> |