diff peakzilla.xml @ 0:ca3ec50bfd94 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/peakzilla commit defa1857cbd66d5f90e3e6f98dc11e1d215b742a
author iuc
date Mon, 26 Feb 2024 10:55:19 +0000
parents
children 8badcbe5792c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/peakzilla.xml	Mon Feb 26 10:55:19 2024 +0000
@@ -0,0 +1,160 @@
+<tool id="peakzilla" name="Peakzilla" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+    <description>Identify transcription factor binding sites from ChIP-seq and ChIP-exo experiments</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.7">python</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">peakzilla</requirement>
+    </requirements>
+    <stdio>
+      <exit_code range="1:" level="fatal" description="Generic error"/>
+      <regex match="ValueError: cannot convert float NaN to integer"
+             level="fatal"
+             description="No peaks detected or input data error"/>
+    </stdio>
+    <command>
+        <![CDATA[
+            peakzilla.py
+            #if $options.model_peaks
+                -m '$options.model_peaks'
+            #end if
+            #if $options.enrichment_cutoff
+                -c '$options.enrichment_cutoff'
+            #end if
+            #if $options.score_cutoff
+                -s '$options.score_cutoff'
+            #end if
+            #if $options.fragment_size
+                -f '$options.fragment_size'
+            #end if
+            #if $options.gaussian
+                -e
+            #end if
+            #if $options.bedpe
+                -p
+            #end if
+            #if $outputs.negative
+                -n
+            #end if
+            #if $outputs.log
+                -l log.txt
+            #end if
+            '$chip_bed' '$input_bed' > '$results'
+        ]]>
+    </command>
+    <inputs>
+        <param name="chip_bed" type="data" format="bed" label="ChIP Dataset in BED format"/>
+        <param name="input_bed" type="data" format="bed" label="Input Dataset in BED format"/>
+        <section name="options" title="Optional Parameters" expanded="false">
+            <param name="model_peaks" type="integer" optional="true" min="1" label="Number of most highly enriched regions used to estimate peak size (Default: 200)"/>
+            <param name="enrichment_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for fold enrichment (Default: 2)"/>
+            <param name="score_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for peak score (Default: 1)"/>
+            <param name="fragment_size" type="integer" optional="true" min="1" label="Manually set fragment size in bp" help="If not set, it will be estimated from data"/>
+            <param name="gaussian" type="boolean" checked="false" label="Use empirical model estimate instead of gaussian"/>
+            <param name="bedpe" type="boolean" checked="false" label="Input is paired end and in BEDPE format"/>
+        </section>
+        <section name="outputs" title="Output Options" expanded="false">
+            <param name="negative" type="boolean" checked="false" label="Output negative peaks in control sample"/>
+            <param name="log" type="boolean" checked="false" label="Output log file"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="results" format="tabular" label="${tool.name} on ${on_string}"/>
+        <data name="log" format="txt" from_work_dir="log.txt" label="Log file for ${tool.name} on ${on_string}">
+            <filter>log</filter>
+        </data>
+        <data name="negative_peaks" format="tabular" from_work_dir="negative_peaks.tsv" label="Negative peaks for ${tool.name} on ${on_string}">
+            <filter>negative</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="chip_bed" value="chip.bed" />
+            <param name="input_bed" value="input.bed" />
+            <param name="model_peaks" value="200" />
+            <param name="fragment_size" value="50" />
+            <param name="enrichment_cutoff" value="2" />
+            <param name="score_cutoff" value="1" />
+            <param name="log" value="true" />
+            <param name="negative" value="true" />
+            <output name="results" file="results.tsv">
+                <assert_contents>
+                    <has_text text="Peak_1" />
+                </assert_contents>
+            </output>
+            <output name="log" file="log.txt" lines_diff="30">
+                <assert_contents>
+                    <has_text text="9569"/>
+                </assert_contents>
+            </output>
+            <output name="negative_peaks" file="negative_peaks.tsv">
+                <assert_contents>
+                     <has_text text="Chromosome" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        **Peakzilla**
+
+        Peakzilla identifies sites of enrichment and transcription factor binding sites from transcription factor ChIP-seq and ChIP-exo experiments at high accuracy and resolution.
+        It is designed to perform equally well for data from any species. All necessary parameters are estimated from the data. Peakzilla is suitable for both single and
+        paired-end data from any sequencing platform.
+
+        Note that peakzilla is not suited for the identification of broad regions of enrichment (e.g. ChIP-seq for histone marks), we recommend using MACS instead: Zhang et al.
+        Model-based Analysis of ChIP-Seq (MACS). Genome Biol (2008) 9(9):R137
+
+        *INPUT FORMAT*
+        Peakzilla accepts BED formatted alignments as input.
+
+        For conversation to BED format and working with BED files and alignments in
+        general I highly recommend:
+
+        * bowtie (http://bowtie-bio.sourceforge.net/)
+        * SAMtools (http://samtools.sourceforge.net/)
+        * bedtools (http://code.google.com/p/bedtools/)
+
+
+        *WORKFLOW EXAMPLE*
+        # use bowtie to map uniquely mappable reads to the genome
+        bowtie -p4 -m1 --sam genome_index input.fastq input.sam
+        bowtie -p4 -m1 --sam genome_index chip.fastq chip.sam
+
+        # convert to BAM format
+        samtools view -bS input.sam > input.bam
+        samtools view -bS chip.sam > chip.bam
+
+        # convert to BED format
+        bamToBed -i input.bam > input.bed
+        bamToBed -i chip.bam > chip.bed
+
+        # run peakzilla
+        python peakzilla.py chip.bed input.bed > chip_peaks.tsv
+
+        # Comparison of 2 datasets
+        #    Determine significant peaks with a score threshold of 10
+        python peakzilla.py -s 10 chip1.bed input1.bed > chip1_s10_peaks.tsv
+        #    Determine enriched regions with a score threshold of 2
+        python peakzilla.py -s 2 chip2.bed input2.bed > chip2_s2_peaks.tsv
+        #    Overlap significant peaks from chip1 with enriched regions from chip2
+        intersectBed -a chip1_s10_peaks.tsv -b chip2_s2_peaks.tsv > intersect_peaks.tsv
+
+        For example datasets as well as an example of a computational pipeline for the comparative analysis of ChIP-seq datasets, please refer to our
+        publication: Bardet AF et al. A computational pipeline for comparative ChIP-seq analyses. Nature Protocols (2011) 7(1):45-61 (http://www.starklab.org/data/bardet_natprotoc_2011/)
+
+        *OPTIONS*
+        One of peakzilla's design goals is to learn all the necessary information
+        from the data. The usage of the options should therefore not be required.
+
+        *OUTPUT FORMAT*
+        * Results are printed as a table of tab-delimited values to stdout
+        * Logs are appended to logs.txt in the current directory or a custom directory/filename specified by the -l option
+        * Enriched regions in the control sample are written to negative_peaks.tsv or a custom directory/filename specified by the -n option
+        * Columns represent Chromosome / Start / End / Name / Summit / Score / ChIP / Control / FoldEnrichment / DistributionScore / FDR (%)
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/nprot.2011.420</citation>
+    </citations>
+</tool>