Mercurial > repos > mpaya > epic2
diff epic2/epic2_wrapper.xml @ 2:601ad3ea888b draft
Uploaded
author | mpaya |
---|---|
date | Wed, 08 May 2019 08:11:56 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/epic2/epic2_wrapper.xml Wed May 08 08:11:56 2019 -0400 @@ -0,0 +1,372 @@ +<tool id="epic2" name="epic2" version="@VERSION@.0"> + <description>peak calling of broad ChIP-Seq marks</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + + <stdio> + <exit_code range="1:125" level="fatal" description="Unknown error occurred" /> + <exit_code range="130:" level="fatal" description="Unknown error occurred" /> + <regex match="epic2: (command ){0,1}not found" source="stderr" level="fatal" description="The epic2 python package is not properly installed, contact Galaxy administrators" /> + </stdio> + + <command><![CDATA[ + + ##set up treatment files, extension must be bed, bedpe, bam, sam + #set $t_file_list = list() + #if str($treatment.t_multi_select) == "No": + #if $treatment.input_treatment_file.is_of_type('bed') + #set $t_file = 'ChIP_file.bed' + ln -s '$treatment.input_treatment_file' $t_file && + #elif $treatment.input_treatment_file.is_of_type('bam') + #set $t_file = 'ChIP_file.bam' + ln -s '$treatment.input_treatment_file' $t_file && + ln -s '$treatment.input_treatment_file.metadata.bam_index' ${t_file}.bai && + #elif $treatment.input_treatment_file.is_of_type('sam') + #set $t_file = 'ChIP_file.sam' + ln -s '$treatment.input_treatment_file' $t_file && + #end if + $t_file_list.append($t_file) + #else + #set $inputs = $treatment.input_treatment_file + #for $i, $f in enumerate($inputs) + #if $f.is_of_type('bed') + #set $t_file = ''.join(['ChIP_file_',str($i),'.bed']) + ln -s '$f' $t_file && + #elif $f.is_of_type('bam') + #set $t_file = ''.join(['ChIP_file_',str($i),'.bam']) + ln -s '$f' $t_file && + ln -s '$f.metadata.bam_index' ${t_file}.bai && + #elif $f.is_of_type('sam') + #set $t_file = ''.join(['ChIP_file_',str($i),'.sam']) + ln -s '$f' $t_file && + #end if + $t_file_list.append($t_file) + #end for + #end if + + ##set up control files, extension must be bed, bedpe, bam, sam + #if str($control.c_select) == "Yes": + #set $c_file_list = list() + #if str($control.c_multiple.c_multi_select) == "No": + #set $f = $control.c_multiple.input_control_file + #if $f.is_of_type('bed') + #set $c_file = 'control_file.bed' + ln -s '$f' $c_file && + #elif $f.is_of_type('bam') + #set $c_file = 'control_file.bam' + ln -s '$f' $c_file && + ln -s '$f.metadata.bam_index' ${c_file}.bai && + #elif $f.is_of_type('sam') + #set $c_file = 'control_file.sam' + ln -s '$f' $c_file && + #end if + $c_file_list.append($c_file) + #else + #set $inputs = $control.c_multiple.input_control_file + #for $i, $f in enumerate($inputs) + #if $f.is_of_type('bed') + #set $c_file = ''.join(['control_file',str($i),'.bed']) + ln -s '$f' $c_file && + #elif $f.is_of_type('bam') + #set $c_file = ''.join(['control_file',str($i),'.bam']) + ln -s '$f' $c_file && + ln -s '$f.metadata.bam_index' ${c_file}.bai && + #elif $f.is_of_type('sam') + #set $c_file = ''.join(['control_file',str($i),'.sam']) + ln -s '$f' $c_file && + #end if + $c_file_list.append($c_file) + #end for + #end if + #end if + + epic2 + + ## Treatment File(s) + -t ${ ' '.join( $t_file_list ) } + + ## Control File(s) + #if str($control.c_select) == "Yes": + -c ${ ' '.join( $c_file_list ) } + #end if + + ## Predefined or Custom Genome + #if str($genome.g_select) == "Yes": + --genome ${genome.builtin_genome} + #else + #if str($genome.chromsizes.chr_select) == "No": + #if $genome.chromsizes.cs_file.is_of_type('fasta'): + --chromsizes <(awk '/^>/ {if (seqlen) print seqlen;printf substr($1,2) "\t";seqlen=0;next} + {seqlen+=length($0)}END{print seqlen}' '${genome.chromsizes.cs_file}') + #else + --chromsizes ${genome.chromsizes.cs_file} + #end if + #else + #if $genome.chromsizes.builtin_fasta.fields.path + --chromsizes <(awk '/^>/ {if (seqlen) print seqlen;printf substr($1,2) "\t";seqlen=0;next} + {seqlen+=length($0)}END{print seqlen}' '${genome.chromsizes.builtin_fasta.fields.path}') + #end if + #end if + #end if + #if $genome.egf: + --effective-genome-fraction ${genome.egf} + #end if + + #if $fdr: + -fdr $fdr + #end if + + ## BAM OPTIONS + #if $bam_options.required_flag: + --required-flag $bam_options.required_flag + #end if + + #if $bam_options.filter_flag: + --filter-flag $bam_options.filter_flag + #end if + + #if $bam_options.mapq: + --mapq $bam_options.mapq + #end if + + #if $bam_options.autodetect_chroms: + --autodetect-chroms + #end if + + #if $bam_options.discard_chroms: + --discard-chromosomes-pattern $bam_options.discard_chroms + #end if + + ## ADVANCED OPTIONS + #if $advanced_options.keep_dupes: + --keep-duplicates + #end if + + #if $advanced_options.bin_size: + --bin-size $advanced_options.bin_size + #end if + + #if $advanced_options.gaps_allowed: + --gaps-allowed $advanced_options.gaps_allowed + #end if + + #if $advanced_options.fragment_size: + --fragment-size $advanced_options.fragment_size + #end if + + #if $advanced_options.original_algorithm: + --original-algorithm + #end if + + > ${peaks} + 2> >(awk 'NF' >&2) + + #if $to_bed: + && + awk 'NR>1{if ($4==0) {pv=50;qv=50}else{pv=-log($4)/log(10);qv=-log($9)/log(10)}; + print $1,$2,$3,"island_"NR-1,int($5),$6,$10,pv,qv}' OFS="\t" ${peaks} > ${bed_peaks} + #end if + + ]]></command> + + <inputs> + <conditional name="treatment"> + <param name="t_multi_select" type="select" label="Are you pooling Treatment Files?" help="" > + <option value="No" selected="True">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" > + <param name="input_treatment_file" argument="-t" type="data" format="bam,sam,bed" label="ChIP-Seq Treatment File" help="(-t)" /> + </when> + <when value="Yes"> + <param name="input_treatment_file" argument="-t" type="data" format="bam,sam,bed" multiple="true" + label="ChIP-Seq Treatment File" help="(-t)" /> + </when> + </conditional> + + <conditional name="control"> + <param name="c_select" type="select" label="Do you have a Control File?" > + <option value="Yes">Yes</option> + <option value="No" selected="True">No</option> + </param> + <when value="Yes"> + <conditional name="c_multiple"> + <param name="c_multi_select" type="select" label="Are you pooling Control Files?" help="" > + <option value="No" selected="True">No</option> + <option value="Yes">Yes</option> + </param> + <when value="No" > + <param name="input_control_file" argument="-c" type="data" format="bam,sam,bed" label="ChIP-Seq Control File" help="(-c)" /> + </when> + <when value="Yes"> + <param name="input_control_file" argument="-c" type="data" format="bam,sam,bed" multiple="true" + label="ChIP-Seq Control File" help="(-c)" /> + </when> + </conditional> + </when> + <when value="No"> + <param name="evalue" argument="-e" type="integer" optional="True" label="e-value" + help="The E-value controls the genome-wide error rate of identified islands under the random + background assumption. Should be used when not using a control library. Default 1000." /> + </when> + </conditional> + + <conditional name="genome"> + <param name="g_select" type="select" label="Is your genome indexed?" > + <option value="Yes" selected="True">Yes</option> + <option value="No">No</option> + </param> + <when value="Yes"> + <expand macro="effectiveGenomeSize" /> + <param name="egf" argument="-egf" type="float" min="0" max="1" optional="True" label="Effective genome fraction" + help="Use a different effective genome fraction than the one included in epic2, which depends on genome and readlength. (-egf)" /> + </when> + <when value="No"> + <conditional name="chromsizes"> + <param name="chr_select" type="select" label="Do you want to use an indexed fasta file?" + help="Chromosome sizes will be calculated from the provided fasta file." > + <option value="No">No</option> + <option value="Yes" selected="True">Yes</option> + </param> + <when value="No" > + <param name="cs_file" argument="--chromsizes" type="data" format="fasta,txt,tabular,tsv" label="Chromosome sizes" + help="Provide a fasta file for automated calculation, or a tab-separated file with + two columns: chromosome names and sizes. (--chromsizes)" /> + </when> + <when value="Yes"> + <param name="builtin_fasta" argument="--chromsizes" type="select" optional="True" label="Genome for fasta file" help="(--chromsizes)" > + <options from_data_table="fasta_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + </conditional> + <param name="egf" argument="-egf" type="float" min="0" max="1" optional="True" label="Effective genome fraction" + help="The effective genome fraction is the proportion of the genome that is mappable, excluding Ns. (-egf)" /> + </when> + </conditional> + + <param name="fdr" argument="-fdr" type="float" min="0" max="1" optional="True" label="False discovery rate cutoff" + help="Remove all islands with an FDR above cutoff. Default 0.05 (-fdr)" /> + + <param name="to_bed" type="boolean" checked="false" label="Print output in bed format?"/> + + <section name="bam_options" title="BAM Options"> + <param name="required_flag" argument="--required-flag" type="integer" optional="True" label="Required flag" + help="Keep reads with these bits set in flag. Same as `samtools view -f`. Default 0. (--required-flag)" /> + <param name="filter_flag" argument="--filter-flag" type="integer" optional="True" label="Filter flag" + help="Discard reads with these bits set in flag. Same as `samtools view -F`. Default 1540 (hex: 0x604). (--filter-flag)" /> + <param name="mapq" argument="--mapq" type="integer" optional="True" label="Mapping quality" + help="Discard reads with mapping quality lower than this. Default 5. (--mapq)" /> + <param name="autodetect_chroms" type="boolean" checked="false" truevalue="--autodetect-chroms" falsevalue="" label="Autodetect chromosomes?" + help="Autodetect chromosomes from bam file. Use with --discard-chromosomes flag to avoid non-canonical chromosomes. (--autodetect-chroms)" /> + <param name="discard_chroms" argument="--discard-chromosomes-pattern" type="text" optional="True" label="Discard chromosomes pattern" + help="Discard reads from chromosomes matching + this pattern. Default '_'. Note that if you are not + interested in the results from non-canonical + chromosomes, you should ensure they are removed with + this flag, otherwise they will make the statistical + analysis too stringent. (--discard-chromosomes-pattern)"/> + </section> + + <section name="advanced_options" title="Advanced Options"> + <param name="keep_dupes" type="boolean" checked="false" truevalue="--keep-duplicates" falsevalue="" label="Keep duplicates?" + help="Keep reads mapping to the same position on the same strand within a library. (--keep-duplicates)" /> + <param name="bin_size" argument="--bin-size" type="integer" optional="True" label="Bin size" + help="Size of the windows to scan the genome. BIN-SIZE is the smallest possible island. Default 200. (--bin-size)" /> + <param name="gaps_allowed" argument="--gaps-allowed" type="integer" optional="True" label="Gaps allowed" + help="This number is multiplied by the window size to determine the number of gaps (ineligible windows) allowed + between two eligible windows. Default 3. (--gaps-allowed)"/> + <param name="fragment_size" argument="--fragment-size" type="integer" optional="True" label="Fragment size" + help="(Single end reads only) Size of the sequenced fragment. Each read is extended half the fragment size from the 5' end. + Default 150 (i.e. extend by 75). (--fragment-size)" /> + <param name="original_algorithm" type="boolean" checked="false" truevalue="--original-algorithm" falsevalue="" + label="Compute p-values with SICER original algorithm?" + help="Use the original SICER algorithm, without the epic2 fix. This will use all reads in your files to compute + the p-values, including those falling outside the genome boundaries. (--original-algorithm)" /> + </section> + </inputs> + + <outputs> + <data format="tabular" name="peaks" label="${tool.name} on ${on_string}"/> + <data format='bed' name='bed_peaks' label="${tool.name} on ${on_string}: BED"> + <filter>to_bed</filter> + </data> + + </outputs> + + <tests> + <test> + <param name="input_treatment_file" value="test.bam" ftype="bam" /> + <param name="c_select" value="Yes" /> + <param name="input_control_file" value="control.bam" ftype="bam"/> + <output name="peaks" file="epic2_results.txt"/> + </test> + <test> + <param name="input_treatment_file" value="test.bed.gz" ftype="bed" /> + <param name="c_select" value="Yes" /> + <param name="input_control_file" value="control.bed.gz" ftype="bed"/> + <output name="peaks" file="epic2_results1.txt"/> + </test> + <test> + <param name="input_treatment_file" value="test_ChIP.bam" ftype="bam" /> + <param name="c_select" value="Yes" /> + <param name="input_control_file" value="test_Input.bam" ftype="bam"/> + <param name="g_select" value="No" /> + <param name="chr_select" value="No" /> + <param name="cs_file" value="test_chromsizes.txt" /> + <param name="egf" value="0.99" /> + <param name="original_algorithm" value="Yes" /> + <output name="peaks" file="epic2_results2.txt"/> + </test> + <test> + <param name="input_treatment_file" value="test_ChIP.bam" ftype="bam" /> + <param name="c_select" value="Yes" /> + <param name="input_control_file" value="test_Input.bam" ftype="bam"/> + <param name="g_select" value="No" /> + <param name="chr_select" value="No" /> + <param name="cs_file" value="test_fasta.fasta" /> + <param name="egf" value="0.99" /> + <param name="to_bed" value="Yes" /> + <param name="mapq" value="10" /> + <param name="bin_size" value="100" /> + <param name="gaps_allowed" value="0" /> + <output name="peaks" file="epic2_results3.txt"/> + <output name="bed_peaks" file="epic2_results3.bed"/> + </test> + </tests> + + <help> +Chip-Seq broad peak/domain finder based on SICER. + +**Accepted input formats** + +Input file extension must be bed, bedpe, bam or sam. + +**Broad peaks format** + +Original epic2 output does not follow any standard format, then broad peaks bed format is offered. This format follows the standard from ENCODE, BED 6 + 3, and contains the following columns: + + * **1.** Chrom + * **2.** Start + * **3.** End + * **4.** Name + * **5.** Score + * **6.** Strand + * **7.** log2FoldChange + * **8.** -log10PValue + * **9.** -log10FDR + +.. class:: warningmark + + On columns 8 and 9, the max value is set to 50 when Pvalue == 0.0. + +Tool adapted to Galaxy by Miriam PayĆ” Milans. Original documentation on https://github.com/biocore-ntnu/epic2. + + + </help> + <expand macro="citations" /> +</tool>