Mercurial > repos > iuc > peakzilla
diff peakzilla.xml @ 0:ca3ec50bfd94 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/peakzilla commit defa1857cbd66d5f90e3e6f98dc11e1d215b742a
author | iuc |
---|---|
date | Mon, 26 Feb 2024 10:55:19 +0000 |
parents | |
children | 8badcbe5792c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peakzilla.xml Mon Feb 26 10:55:19 2024 +0000 @@ -0,0 +1,160 @@ +<tool id="peakzilla" name="Peakzilla" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <description>Identify transcription factor binding sites from ChIP-seq and ChIP-exo experiments</description> + <macros> + <token name="@TOOL_VERSION@">1.0</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="@TOOL_VERSION@">peakzilla</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Generic error"/> + <regex match="ValueError: cannot convert float NaN to integer" + level="fatal" + description="No peaks detected or input data error"/> + </stdio> + <command> + <![CDATA[ + peakzilla.py + #if $options.model_peaks + -m '$options.model_peaks' + #end if + #if $options.enrichment_cutoff + -c '$options.enrichment_cutoff' + #end if + #if $options.score_cutoff + -s '$options.score_cutoff' + #end if + #if $options.fragment_size + -f '$options.fragment_size' + #end if + #if $options.gaussian + -e + #end if + #if $options.bedpe + -p + #end if + #if $outputs.negative + -n + #end if + #if $outputs.log + -l log.txt + #end if + '$chip_bed' '$input_bed' > '$results' + ]]> + </command> + <inputs> + <param name="chip_bed" type="data" format="bed" label="ChIP Dataset in BED format"/> + <param name="input_bed" type="data" format="bed" label="Input Dataset in BED format"/> + <section name="options" title="Optional Parameters" expanded="false"> + <param name="model_peaks" type="integer" optional="true" min="1" label="Number of most highly enriched regions used to estimate peak size (Default: 200)"/> + <param name="enrichment_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for fold enrichment (Default: 2)"/> + <param name="score_cutoff" type="integer" optional="true" min="1" label="Minimum cutoff for peak score (Default: 1)"/> + <param name="fragment_size" type="integer" optional="true" min="1" label="Manually set fragment size in bp" help="If not set, it will be estimated from data"/> + <param name="gaussian" type="boolean" checked="false" label="Use empirical model estimate instead of gaussian"/> + <param name="bedpe" type="boolean" checked="false" label="Input is paired end and in BEDPE format"/> + </section> + <section name="outputs" title="Output Options" expanded="false"> + <param name="negative" type="boolean" checked="false" label="Output negative peaks in control sample"/> + <param name="log" type="boolean" checked="false" label="Output log file"/> + </section> + </inputs> + <outputs> + <data name="results" format="tabular" label="${tool.name} on ${on_string}"/> + <data name="log" format="txt" from_work_dir="log.txt" label="Log file for ${tool.name} on ${on_string}"> + <filter>log</filter> + </data> + <data name="negative_peaks" format="tabular" from_work_dir="negative_peaks.tsv" label="Negative peaks for ${tool.name} on ${on_string}"> + <filter>negative</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="3"> + <param name="chip_bed" value="chip.bed" /> + <param name="input_bed" value="input.bed" /> + <param name="model_peaks" value="200" /> + <param name="fragment_size" value="50" /> + <param name="enrichment_cutoff" value="2" /> + <param name="score_cutoff" value="1" /> + <param name="log" value="true" /> + <param name="negative" value="true" /> + <output name="results" file="results.tsv"> + <assert_contents> + <has_text text="Peak_1" /> + </assert_contents> + </output> + <output name="log" file="log.txt" lines_diff="30"> + <assert_contents> + <has_text text="9569"/> + </assert_contents> + </output> + <output name="negative_peaks" file="negative_peaks.tsv"> + <assert_contents> + <has_text text="Chromosome" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + **Peakzilla** + + Peakzilla identifies sites of enrichment and transcription factor binding sites from transcription factor ChIP-seq and ChIP-exo experiments at high accuracy and resolution. + It is designed to perform equally well for data from any species. All necessary parameters are estimated from the data. Peakzilla is suitable for both single and + paired-end data from any sequencing platform. + + Note that peakzilla is not suited for the identification of broad regions of enrichment (e.g. ChIP-seq for histone marks), we recommend using MACS instead: Zhang et al. + Model-based Analysis of ChIP-Seq (MACS). Genome Biol (2008) 9(9):R137 + + *INPUT FORMAT* + Peakzilla accepts BED formatted alignments as input. + + For conversation to BED format and working with BED files and alignments in + general I highly recommend: + + * bowtie (http://bowtie-bio.sourceforge.net/) + * SAMtools (http://samtools.sourceforge.net/) + * bedtools (http://code.google.com/p/bedtools/) + + + *WORKFLOW EXAMPLE* + # use bowtie to map uniquely mappable reads to the genome + bowtie -p4 -m1 --sam genome_index input.fastq input.sam + bowtie -p4 -m1 --sam genome_index chip.fastq chip.sam + + # convert to BAM format + samtools view -bS input.sam > input.bam + samtools view -bS chip.sam > chip.bam + + # convert to BED format + bamToBed -i input.bam > input.bed + bamToBed -i chip.bam > chip.bed + + # run peakzilla + python peakzilla.py chip.bed input.bed > chip_peaks.tsv + + # Comparison of 2 datasets + # Determine significant peaks with a score threshold of 10 + python peakzilla.py -s 10 chip1.bed input1.bed > chip1_s10_peaks.tsv + # Determine enriched regions with a score threshold of 2 + python peakzilla.py -s 2 chip2.bed input2.bed > chip2_s2_peaks.tsv + # Overlap significant peaks from chip1 with enriched regions from chip2 + intersectBed -a chip1_s10_peaks.tsv -b chip2_s2_peaks.tsv > intersect_peaks.tsv + + For example datasets as well as an example of a computational pipeline for the comparative analysis of ChIP-seq datasets, please refer to our + publication: Bardet AF et al. A computational pipeline for comparative ChIP-seq analyses. Nature Protocols (2011) 7(1):45-61 (http://www.starklab.org/data/bardet_natprotoc_2011/) + + *OPTIONS* + One of peakzilla's design goals is to learn all the necessary information + from the data. The usage of the options should therefore not be required. + + *OUTPUT FORMAT* + * Results are printed as a table of tab-delimited values to stdout + * Logs are appended to logs.txt in the current directory or a custom directory/filename specified by the -l option + * Enriched regions in the control sample are written to negative_peaks.tsv or a custom directory/filename specified by the -n option + * Columns represent Chromosome / Start / End / Name / Summit / Score / ChIP / Control / FoldEnrichment / DistributionScore / FDR (%) + ]]></help> + <citations> + <citation type="doi">10.1038/nprot.2011.420</citation> + </citations> +</tool>