Mercurial > repos > mpaya > epic2
view epic2/epic2_wrapper.xml @ 2:601ad3ea888b draft
Uploaded
author | mpaya |
---|---|
date | Wed, 08 May 2019 08:11:56 -0400 |
parents | |
children |
line wrap: on
line source
<tool id="epic2" name="epic2" version="@VERSION@.0"> <description>peak calling of broad ChIP-Seq marks</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <stdio> <exit_code range="1:125" level="fatal" description="Unknown error occurred" /> <exit_code range="130:" level="fatal" description="Unknown error occurred" /> <regex match="epic2: (command ){0,1}not found" source="stderr" level="fatal" description="The epic2 python package is not properly installed, contact Galaxy administrators" /> </stdio> <command><![CDATA[ ##set up treatment files, extension must be bed, bedpe, bam, sam #set $t_file_list = list() #if str($treatment.t_multi_select) == "No": #if $treatment.input_treatment_file.is_of_type('bed') #set $t_file = 'ChIP_file.bed' ln -s '$treatment.input_treatment_file' $t_file && #elif $treatment.input_treatment_file.is_of_type('bam') #set $t_file = 'ChIP_file.bam' ln -s '$treatment.input_treatment_file' $t_file && ln -s '$treatment.input_treatment_file.metadata.bam_index' ${t_file}.bai && #elif $treatment.input_treatment_file.is_of_type('sam') #set $t_file = 'ChIP_file.sam' ln -s '$treatment.input_treatment_file' $t_file && #end if $t_file_list.append($t_file) #else #set $inputs = $treatment.input_treatment_file #for $i, $f in enumerate($inputs) #if $f.is_of_type('bed') #set $t_file = ''.join(['ChIP_file_',str($i),'.bed']) ln -s '$f' $t_file && #elif $f.is_of_type('bam') #set $t_file = ''.join(['ChIP_file_',str($i),'.bam']) ln -s '$f' $t_file && ln -s '$f.metadata.bam_index' ${t_file}.bai && #elif $f.is_of_type('sam') #set $t_file = ''.join(['ChIP_file_',str($i),'.sam']) ln -s '$f' $t_file && #end if $t_file_list.append($t_file) #end for #end if ##set up control files, extension must be bed, bedpe, bam, sam #if str($control.c_select) == "Yes": #set $c_file_list = list() #if str($control.c_multiple.c_multi_select) == "No": #set $f = $control.c_multiple.input_control_file #if $f.is_of_type('bed') #set $c_file = 'control_file.bed' ln -s '$f' $c_file && #elif $f.is_of_type('bam') #set $c_file = 'control_file.bam' ln -s '$f' $c_file && ln -s '$f.metadata.bam_index' ${c_file}.bai && #elif $f.is_of_type('sam') #set $c_file = 'control_file.sam' ln -s '$f' $c_file && #end if $c_file_list.append($c_file) #else #set $inputs = $control.c_multiple.input_control_file #for $i, $f in enumerate($inputs) #if $f.is_of_type('bed') #set $c_file = ''.join(['control_file',str($i),'.bed']) ln -s '$f' $c_file && #elif $f.is_of_type('bam') #set $c_file = ''.join(['control_file',str($i),'.bam']) ln -s '$f' $c_file && ln -s '$f.metadata.bam_index' ${c_file}.bai && #elif $f.is_of_type('sam') #set $c_file = ''.join(['control_file',str($i),'.sam']) ln -s '$f' $c_file && #end if $c_file_list.append($c_file) #end for #end if #end if epic2 ## Treatment File(s) -t ${ ' '.join( $t_file_list ) } ## Control File(s) #if str($control.c_select) == "Yes": -c ${ ' '.join( $c_file_list ) } #end if ## Predefined or Custom Genome #if str($genome.g_select) == "Yes": --genome ${genome.builtin_genome} #else #if str($genome.chromsizes.chr_select) == "No": #if $genome.chromsizes.cs_file.is_of_type('fasta'): --chromsizes <(awk '/^>/ {if (seqlen) print seqlen;printf substr($1,2) "\t";seqlen=0;next} {seqlen+=length($0)}END{print seqlen}' '${genome.chromsizes.cs_file}') #else --chromsizes ${genome.chromsizes.cs_file} #end if #else #if $genome.chromsizes.builtin_fasta.fields.path --chromsizes <(awk '/^>/ {if (seqlen) print seqlen;printf substr($1,2) "\t";seqlen=0;next} {seqlen+=length($0)}END{print seqlen}' '${genome.chromsizes.builtin_fasta.fields.path}') #end if #end if #end if #if $genome.egf: --effective-genome-fraction ${genome.egf} #end if #if $fdr: -fdr $fdr #end if ## BAM OPTIONS #if $bam_options.required_flag: --required-flag $bam_options.required_flag #end if #if $bam_options.filter_flag: --filter-flag $bam_options.filter_flag #end if #if $bam_options.mapq: --mapq $bam_options.mapq #end if #if $bam_options.autodetect_chroms: --autodetect-chroms #end if #if $bam_options.discard_chroms: --discard-chromosomes-pattern $bam_options.discard_chroms #end if ## ADVANCED OPTIONS #if $advanced_options.keep_dupes: --keep-duplicates #end if #if $advanced_options.bin_size: --bin-size $advanced_options.bin_size #end if #if $advanced_options.gaps_allowed: --gaps-allowed $advanced_options.gaps_allowed #end if #if $advanced_options.fragment_size: --fragment-size $advanced_options.fragment_size #end if #if $advanced_options.original_algorithm: --original-algorithm #end if > ${peaks} 2> >(awk 'NF' >&2) #if $to_bed: && awk 'NR>1{if ($4==0) {pv=50;qv=50}else{pv=-log($4)/log(10);qv=-log($9)/log(10)}; print $1,$2,$3,"island_"NR-1,int($5),$6,$10,pv,qv}' OFS="\t" ${peaks} > ${bed_peaks} #end if ]]></command> <inputs> <conditional name="treatment"> <param name="t_multi_select" type="select" label="Are you pooling Treatment Files?" help="" > <option value="No" selected="True">No</option> <option value="Yes">Yes</option> </param> <when value="No" > <param name="input_treatment_file" argument="-t" type="data" format="bam,sam,bed" label="ChIP-Seq Treatment File" help="(-t)" /> </when> <when value="Yes"> <param name="input_treatment_file" argument="-t" type="data" format="bam,sam,bed" multiple="true" label="ChIP-Seq Treatment File" help="(-t)" /> </when> </conditional> <conditional name="control"> <param name="c_select" type="select" label="Do you have a Control File?" > <option value="Yes">Yes</option> <option value="No" selected="True">No</option> </param> <when value="Yes"> <conditional name="c_multiple"> <param name="c_multi_select" type="select" label="Are you pooling Control Files?" help="" > <option value="No" selected="True">No</option> <option value="Yes">Yes</option> </param> <when value="No" > <param name="input_control_file" argument="-c" type="data" format="bam,sam,bed" label="ChIP-Seq Control File" help="(-c)" /> </when> <when value="Yes"> <param name="input_control_file" argument="-c" type="data" format="bam,sam,bed" multiple="true" label="ChIP-Seq Control File" help="(-c)" /> </when> </conditional> </when> <when value="No"> <param name="evalue" argument="-e" type="integer" optional="True" label="e-value" help="The E-value controls the genome-wide error rate of identified islands under the random background assumption. Should be used when not using a control library. Default 1000." /> </when> </conditional> <conditional name="genome"> <param name="g_select" type="select" label="Is your genome indexed?" > <option value="Yes" selected="True">Yes</option> <option value="No">No</option> </param> <when value="Yes"> <expand macro="effectiveGenomeSize" /> <param name="egf" argument="-egf" type="float" min="0" max="1" optional="True" label="Effective genome fraction" help="Use a different effective genome fraction than the one included in epic2, which depends on genome and readlength. (-egf)" /> </when> <when value="No"> <conditional name="chromsizes"> <param name="chr_select" type="select" label="Do you want to use an indexed fasta file?" help="Chromosome sizes will be calculated from the provided fasta file." > <option value="No">No</option> <option value="Yes" selected="True">Yes</option> </param> <when value="No" > <param name="cs_file" argument="--chromsizes" type="data" format="fasta,txt,tabular,tsv" label="Chromosome sizes" help="Provide a fasta file for automated calculation, or a tab-separated file with two columns: chromosome names and sizes. (--chromsizes)" /> </when> <when value="Yes"> <param name="builtin_fasta" argument="--chromsizes" type="select" optional="True" label="Genome for fasta file" help="(--chromsizes)" > <options from_data_table="fasta_indexes"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No indexes are available" /> </options> </param> </when> </conditional> <param name="egf" argument="-egf" type="float" min="0" max="1" optional="True" label="Effective genome fraction" help="The effective genome fraction is the proportion of the genome that is mappable, excluding Ns. (-egf)" /> </when> </conditional> <param name="fdr" argument="-fdr" type="float" min="0" max="1" optional="True" label="False discovery rate cutoff" help="Remove all islands with an FDR above cutoff. Default 0.05 (-fdr)" /> <param name="to_bed" type="boolean" checked="false" label="Print output in bed format?"/> <section name="bam_options" title="BAM Options"> <param name="required_flag" argument="--required-flag" type="integer" optional="True" label="Required flag" help="Keep reads with these bits set in flag. Same as `samtools view -f`. Default 0. (--required-flag)" /> <param name="filter_flag" argument="--filter-flag" type="integer" optional="True" label="Filter flag" help="Discard reads with these bits set in flag. Same as `samtools view -F`. Default 1540 (hex: 0x604). (--filter-flag)" /> <param name="mapq" argument="--mapq" type="integer" optional="True" label="Mapping quality" help="Discard reads with mapping quality lower than this. Default 5. (--mapq)" /> <param name="autodetect_chroms" type="boolean" checked="false" truevalue="--autodetect-chroms" falsevalue="" label="Autodetect chromosomes?" help="Autodetect chromosomes from bam file. Use with --discard-chromosomes flag to avoid non-canonical chromosomes. (--autodetect-chroms)" /> <param name="discard_chroms" argument="--discard-chromosomes-pattern" type="text" optional="True" label="Discard chromosomes pattern" help="Discard reads from chromosomes matching this pattern. Default '_'. Note that if you are not interested in the results from non-canonical chromosomes, you should ensure they are removed with this flag, otherwise they will make the statistical analysis too stringent. (--discard-chromosomes-pattern)"/> </section> <section name="advanced_options" title="Advanced Options"> <param name="keep_dupes" type="boolean" checked="false" truevalue="--keep-duplicates" falsevalue="" label="Keep duplicates?" help="Keep reads mapping to the same position on the same strand within a library. (--keep-duplicates)" /> <param name="bin_size" argument="--bin-size" type="integer" optional="True" label="Bin size" help="Size of the windows to scan the genome. BIN-SIZE is the smallest possible island. Default 200. (--bin-size)" /> <param name="gaps_allowed" argument="--gaps-allowed" type="integer" optional="True" label="Gaps allowed" help="This number is multiplied by the window size to determine the number of gaps (ineligible windows) allowed between two eligible windows. Default 3. (--gaps-allowed)"/> <param name="fragment_size" argument="--fragment-size" type="integer" optional="True" label="Fragment size" help="(Single end reads only) Size of the sequenced fragment. Each read is extended half the fragment size from the 5' end. Default 150 (i.e. extend by 75). (--fragment-size)" /> <param name="original_algorithm" type="boolean" checked="false" truevalue="--original-algorithm" falsevalue="" label="Compute p-values with SICER original algorithm?" help="Use the original SICER algorithm, without the epic2 fix. This will use all reads in your files to compute the p-values, including those falling outside the genome boundaries. (--original-algorithm)" /> </section> </inputs> <outputs> <data format="tabular" name="peaks" label="${tool.name} on ${on_string}"/> <data format='bed' name='bed_peaks' label="${tool.name} on ${on_string}: BED"> <filter>to_bed</filter> </data> </outputs> <tests> <test> <param name="input_treatment_file" value="test.bam" ftype="bam" /> <param name="c_select" value="Yes" /> <param name="input_control_file" value="control.bam" ftype="bam"/> <output name="peaks" file="epic2_results.txt"/> </test> <test> <param name="input_treatment_file" value="test.bed.gz" ftype="bed" /> <param name="c_select" value="Yes" /> <param name="input_control_file" value="control.bed.gz" ftype="bed"/> <output name="peaks" file="epic2_results1.txt"/> </test> <test> <param name="input_treatment_file" value="test_ChIP.bam" ftype="bam" /> <param name="c_select" value="Yes" /> <param name="input_control_file" value="test_Input.bam" ftype="bam"/> <param name="g_select" value="No" /> <param name="chr_select" value="No" /> <param name="cs_file" value="test_chromsizes.txt" /> <param name="egf" value="0.99" /> <param name="original_algorithm" value="Yes" /> <output name="peaks" file="epic2_results2.txt"/> </test> <test> <param name="input_treatment_file" value="test_ChIP.bam" ftype="bam" /> <param name="c_select" value="Yes" /> <param name="input_control_file" value="test_Input.bam" ftype="bam"/> <param name="g_select" value="No" /> <param name="chr_select" value="No" /> <param name="cs_file" value="test_fasta.fasta" /> <param name="egf" value="0.99" /> <param name="to_bed" value="Yes" /> <param name="mapq" value="10" /> <param name="bin_size" value="100" /> <param name="gaps_allowed" value="0" /> <output name="peaks" file="epic2_results3.txt"/> <output name="bed_peaks" file="epic2_results3.bed"/> </test> </tests> <help> Chip-Seq broad peak/domain finder based on SICER. **Accepted input formats** Input file extension must be bed, bedpe, bam or sam. **Broad peaks format** Original epic2 output does not follow any standard format, then broad peaks bed format is offered. This format follows the standard from ENCODE, BED 6 + 3, and contains the following columns: * **1.** Chrom * **2.** Start * **3.** End * **4.** Name * **5.** Score * **6.** Strand * **7.** log2FoldChange * **8.** -log10PValue * **9.** -log10FDR .. class:: warningmark On columns 8 and 9, the max value is set to 50 when Pvalue == 0.0. Tool adapted to Galaxy by Miriam PayĆ” Milans. Original documentation on https://github.com/biocore-ntnu/epic2. </help> <expand macro="citations" /> </tool>