Mercurial > repos > thondeboer > neat_genreads
view neat_genreads.xml @ 10:7d10b55965c9 draft default tip
planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author | thondeboer |
---|---|
date | Wed, 16 May 2018 17:02:51 -0400 |
parents | 5bcfac4f8116 |
children |
line wrap: on
line source
<tool id="neat_genreads" name="NEAT-genReads" version="1.0.0" profile="16.04"> <description>is a fine-grained read simulator</description> <requirements> <requirement type="package">numpy</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ python2 $__tool_directory__/genReads.py #if $in_type.input_type == "built-in": -r ${in_type.reference.fields.path} #else: -r ${in_type.reference} #end if -R $read_length -c $coverage #if $stats.error_model_cond.error_model == 'average_rate' and $stats.error_model_cond.error_rate != '': -E $stats.error_model_cond.error_rate #elif $stats.error_model_cond.error_model == 'error_model_file': -e $stats.error_model_cond.error_file #end if #if $stats.mut_rate_cond.mut_rate == 'average_rate' and $stats.mut_rate_cond.error_rate != '': -M $stats.mut_rate_cond.error_rate #elif $stats.mut_rate_cond.mut_rate == 'error_model_file': -m stats.mut_rate_cond.error_file #elif $stats.mut_rate_cond.mut_rate == 'error_model_bed': -Mb stats.mut_rate_cond.error_file_bed #end if #if $stats.gc_file: --gc-model $stats.gc_file #end if #if $stats.ploidy != 2 and $stats.ploidy > 0: -p $stats.ploidy #end if -o $out_options.prefix #if $lib_type_cond.lib_type == "paired": #if $lib_type_cond.insert_mod_cond.insert_mod == 'fixed_insert': --pe $lib_type_cond.insert_mod_cond.frag_length $lib_type_cond.insert_mod_cond.frag_length_sd #else: --pe-model $lib_type_cond.insert_mod_cond.insert_file #end if #end if $out_options.golden_bam $out_options.golden_vcf #if $target_vcf_cond.target_vcf == "use_vcf": -v ${target_vcf_cond.target_file} #end if #if $target_cond.target == "targeted": -t ${target_cond.target_file} -to ${target_cond.off_target} #end if #if $stats.seed != "": --rng $stats.seed #end if $out_options.compress $out_options.bypass #if $out_options.compress: #if not $out_options.bypass: && mv "${out_options.prefix}_read1.fq.gz" read1.fq.gz #if $lib_type_cond.lib_type == "paired": && mv "${out_options.prefix}_read2.fq.gz" read2.fq.gz #end if #end if #if $out_options.golden_bam != "": && mv "${out_options.prefix}_golden.bam" golden.bam #end if #if $out_options.golden_vcf != "": && gunzip "${out_options.prefix}_golden.vcf.gz" && mv "${out_options.prefix}_golden.vcf" golden.vcf #end if #else: #if not $out_options.bypass: && mv "${out_options.prefix}_read1.fq" read1.fq #if $lib_type_cond.lib_type == "paired": && mv "${out_options.prefix}_read2.fq" read2.fq #end if #end if #if $out_options.golden_bam != "": && mv "${out_options.prefix}_golden.bam" golden.bam #end if #if $out_options.golden_vcf != "": && mv "${out_options.prefix}_golden.vcf" golden.vcf #end if #end if ]]> </command> <inputs> <conditional name="in_type"> <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?"> <option value="built-in">Built-in</option> <option value="history">History file</option> </param> <when value="built-in"> <param name="reference" type="select" label="Select a built-in reference sequence" help="The reference sequence that will be used as the basis for the simulated reads"> <options from_data_table="fasta_indexes" /> </param> </when> <when value="history"> <param name="reference" type="data" format="fasta" label="The reference sequence (FASTA format)" help="The reference sequence that will be used as the basis for the simulated reads" /> </when> </conditional> <param name="read_length" type="integer" value="101" size="4" min="30" label="Read length" help="The read length for the simulated reads [-R]" /> <param name="coverage" type="integer" value="10" size="4" min="1" label="Coverage" help="Average coverage across entire dataset [-c]" /> <conditional name="lib_type_cond"> <param name="lib_type" type="select" label="Single-end or paired-end sequencing library?" > <option value="paired">Paired-end</option> <option value="single">Single-end</option> </param> <when value="paired"> <conditional name="insert_mod_cond"> <param name="insert_mod" type="select" label="Used fixed insert size or empirical model?" > <option value="fixed_insert">Fixed insert size</option> <option value="model_insert">Use model file</option> </param> <when value="fixed_insert"> <param name="frag_length" type="integer" value="300" size="5" min="50" label="Fragment length" help="The average total size of the fragment" /> <param name="frag_length_sd" type="integer" value="30" size="4" min="0" label="Fragment length SD" help="The standard deviation for the average total size of the fragment" /> </when> <when value="model_insert"> <param name="insert_file" optional="false" type="data" format="txt" label="Emperical insert size model file" help="Insert size model file, created by computeFraglen (python pickle format) [-pe-model]" /> </when> </conditional> </when> </conditional> <conditional name="target_cond"> <param name="target" type="select" label="Limit the simulated reads to target area?" > <option value="full">No - Use complete reference</option> <option value="targeted">Yes - Limit to target region</option> </param> <when value="targeted"> <param name="target_file" type="data" format="bed" label="Target region file" help="The simulated reads will primarily orginate from the target region [-t]" /> <param name="off_target" type="float" value="0.02" min="0" max="1" label="Off-target converage" help="The coverage outside the target region will be this fraction. 1-this will be on-target coverage [-to]" /> </when> </conditional> <conditional name="target_vcf_cond"> <param name="target_vcf" type="select" label="Use a VCF file to seed the simulation?" > <option value="no_vcf">No</option> <option value="use_vcf">Yes</option> </param> <when value="use_vcf"> <param name="target_file" type="data" format="vcf" label="Input VCF file" help="Variants from this VCF will be inserted into the simulated sequence with 100% certainty [-v]" /> </when> </conditional> <section name="stats" title="Miscellanous settings" expanded="false" help="The settings for the error rates etc. for the model" > <param name="ploidy" type="integer" value="2" min="1" max="10" optional="false" label="Ploidy of the genome" help="" /> <conditional name="error_model_cond"> <param name="error_model" type="select" label="Sequencing error rate model"> <option value="average_rate">Set an average rate</option> <option value="error_model_file">Use an emperical model file</option> </param> <when value="average_rate"> <param name="error_rate" optional="true" type="float" value="" min="0" max="0.3" label="Average sequencing error rate (0-0.3)" help="Leave blank to use default. The sequencing error rate model is rescaled to make this the average value [-E]" /> </when> <when value="error_model_file"> <param name="error_file" optional="false" type="data" format="txt" label="Emperical error rate model file" help="Error rate model file, created by genSeqErrorModel.py (python pickle format) [-e]" /> </when> </conditional> <conditional name="mut_rate_cond"> <param name="mut_rate" type="select" label="Mutation rate model"> <option value="average_rate">Set an average mutation rate</option> <option value="error_model_file">Use a PICKLE file with mutation rates</option> <option value="error_model_bed">Use a BED file with mutation rates</option> </param> <when value="average_rate"> <param name="error_rate" optional="true" type="float" value="" min="0" max="0.3" label="Average mutation rate (0-0.3)" help="Leave blank to use default. The mutation rate model is rescaled to make this the average value. These random mutations are inserted in addition to the once specified in the -v option [-M]" /> </when> <when value="error_model_file"> <param name="error_file" optional="true" type="data" format="txt" label="Mutation rate model file" help="Mutation rate model file, created by genMutModel (python pickle format) [-m]" /> </when> <when value="error_model_bed"> <param name="error_file_bed" optional="true" type="data" format="bed" label="Mutation rate model BED file" help="Error rate model file, in BED format [-Mb]" /> </when> </conditional> <param name="gc_file" optional="true" type="data" format="txt" label="Emperical GC-bias model file" help="Empirical GC coverage bias distribution. Can be generated using computeGC (python pickle format) [--gc-model]" /> <param name="seed" type="integer" min="1" optional="true" label="Random seed" help="Identical seed values should produce indentical runs, so things like read locations, variant positions, error positions etc. should all be the same [--rng]" /> </section> <section name="out_options" title="Optional output options" expanded="false" help="Optional output options" > <param name="prefix" type="text" label="Output prefix" value="simulated_reads" size="40" help="Prefix for the name of the output simulated reads and other files [-o]" /> <param name="golden_vcf" type="boolean" checked="true" truevalue="--vcf" falsevalue="" label="Create the golden VCF file?" help="The golden VCF file will contain the golden truth for all variants created [--vcf]" /> <param name="golden_bam" type="boolean" checked="true" truevalue="--bam" falsevalue="" label="Create the golden BAM file?" help="The golden BAM file will contain the golden truth for all variants created [--bam]" /> <param name="compress" type="boolean" checked="true" truevalue="--gz" falsevalue="" label="Compress the FASTQ and VCF files with gzip? [--gz]" help="" /> <param name="bypass" type="boolean" checked="false" truevalue="--no-fastq" falsevalue="" label="Bypass generation of FASTQ files?" help="If checked, FASTQ files will not be created. Only BAM and VCF files will (if selected) [--no-fastq]" /> </section> </inputs> <outputs> <data format="fastqsanger" name="out_file1" from_work_dir="read1.fq" label="${out_options.prefix}_read1.fq" metadata_source="in_type.reference"> <filter>not out_options['compress'] and not out_options['bypass']</filter> </data> <data format="fastqsanger" name="out_file2" from_work_dir="read2.fq" label="${out_options.prefix}_read2.fq" metadata_source="in_type.reference"> <filter>not out_options['compress'] and not out_options['bypass'] and lib_type_cond['lib_type'] == 'paired'</filter> </data> <data format="bam" name="out_bam" from_work_dir="golden.bam" label="${out_options.prefix}_golden.bam" metadata_source="in_type.reference"> <filter>out_options['golden_bam']</filter> </data> <data format="vcf" name="out_vcf" from_work_dir="golden.vcf" label="${out_options.prefix}_golden.vcf" metadata_source="in_type.reference"> <filter>out_options['golden_vcf']</filter> </data> <data format="fastqsanger.gz" name="out_file1gz" from_work_dir="read1.fq.gz" label="${out_options.prefix}_read1.fq.gz" metadata_source="in_type.reference"> <filter>not out_options['bypass'] and out_options['compress']</filter> </data> <data format="fastqsanger.gz" name="out_file2gz" from_work_dir="read2.fq.gz" label="${out_options.prefix}_read2.fq.gz" metadata_source="in_type.reference"> <filter>not out_options['bypass'] and lib_type_cond['lib_type'] == 'paired' and out_options['compress']</filter> </data> </outputs> <tests> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="single"/> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="false"/> <param name="golden_vcf" value="false"/> <param name="compress" value="false"/> </section> <output name="out_file1" file="chrMT_read1.fq" compare="diff"/> </test> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="paired"/> <conditional name="insert_mod_cond"> <param name="insert_mod" value="fixed_insert"/> <param name="frag_length" value="300"/> <param name="frag_length_sd" value="30"/> </conditional> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="false"/> <param name="golden_vcf" value="false"/> <param name="compress" value="false"/> </section> <output name="out_file1" file="chrMT-PE_read1.fq" compare="diff"/> <output name="out_file2" file="chrMT-PE_read2.fq" compare="diff"/> </test> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="paired"/> <conditional name="insert_mod_cond"> <param name="insert_mod" value="fixed_insert"/> <param name="frag_length" value="300"/> <param name="frag_length_sd" value="30"/> </conditional> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="true"/> <param name="golden_vcf" value="true"/> <param name="compress" value="false"/> </section> <output name="out_file1" file="chrMT-PE-VCF-BAM_read1.fq" compare="diff"/> <output name="out_file2" file="chrMT-PE-VCF-BAM_read2.fq" compare="diff"/> <output name="out_bam" file="chrMT-PE-VCF-BAM.bam" compare="diff"/> <output name="out_vcf" file="chrMT-PE-VCF-BAM.vcf" compare="diff" lines_diff="2"/> <assert_stdout has_text="Writing output VCF..."/> </test> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="paired"/> <conditional name="insert_mod_cond"> <param name="insert_mod" value="fixed_insert"/> <param name="frag_length" value="300"/> <param name="frag_length_sd" value="30"/> </conditional> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="true"/> <param name="golden_vcf" value="true"/> <param name="compress" value="false"/> </section> <conditional name="target_vcf_cond"> <param name="target_vcf" value="use_vcf"/> <param name="target_file" value="chrMT-PE-VCF-BAM.vcf"/> </conditional> <output name="out_file1" file="chrMT-PE-VCF-BAM-vcf_read1.fq" compare="diff"/> <output name="out_file2" file="chrMT-PE-VCF-BAM-vcf_read2.fq" compare="diff"/> <output name="out_bam" file="chrMT-PE-VCF-BAM-vcf.bam" compare="diff"/> <output name="out_vcf" file="chrMT-PE-VCF-BAM-vcf.vcf" compare="diff" lines_diff="2"/> <assert_stdout has_text="Writing output VCF..."/> </test> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="paired"/> <conditional name="insert_mod_cond"> <param name="insert_mod" value="fixed_insert"/> <param name="frag_length" value="300"/> <param name="frag_length_sd" value="30"/> </conditional> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="true"/> <param name="golden_vcf" value="true"/> <param name="compress" value="false"/> </section> <conditional name="target_cond"> <param name="target" value="targeted"/> <param name="target_file" value="chrMT-Targets.bed"/> <param name="off_target" value="0.02"/> </conditional> <output name="out_file1" file="chrMT-PE-VCF-BAM-Targeted_read1.fq" compare="diff"/> <output name="out_file2" file="chrMT-PE-VCF-BAM-Targeted_read2.fq" compare="diff"/> <output name="out_bam" file="chrMT-PE-VCF-BAM-Targeted.bam" compare="diff"/> <output name="out_vcf" file="chrMT-PE-VCF-BAM-Targeted.vcf" compare="diff" lines_diff="2"/> <assert_stdout has_text="Writing output VCF..."/> </test> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="paired"/> <conditional name="insert_mod_cond"> <param name="insert_mod" value="fixed_insert"/> <param name="frag_length" value="300"/> <param name="frag_length_sd" value="30"/> </conditional> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="true"/> <param name="golden_vcf" value="true"/> <param name="compress" value="true"/> </section> <!-- The decompress does not seem to work <output name="out_file1" file="chrMT-PE-VCF-BAM-gz_read1.fq.gz" compare="diff" decompress="true"/> <output name="out_file2" file="chrMT-PE-VCF-BAM-gz_read2.fq.gz" compare="diff" decompress="true"/> --> <output name="out_bam" file="chrMT-PE-VCF-BAM-gz.bam" compare="diff"/> <output name="out_vcf" file="chrMT-PE-VCF-BAM-gz.vcf" compare="diff" lines_diff="2"/> <assert_stdout has_text="Writing output VCF..."/> </test> <test> <conditional name="in_type"> <param name="input_type" value="history"/> <param name="reference" value="chrMT.fa" format="fasta"/> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="paired"/> <conditional name="insert_mod_cond"> <param name="insert_mod" value="fixed_insert"/> <param name="frag_length" value="500"/> <param name="frag_length_sd" value="50"/> </conditional> </conditional> <conditional name="error_model_cond"> <param name="error_model" value="average_rate"/> <param name="error_rate" value="0.123"/> </conditional> <conditional name="mut_rate_cond"> <param name="mut_rate" value="average_rate"/> <param name="error_rate" value="0.123"/> </conditional> <section name="stats"> <param name="seed" value="123"/> <param name="ploidy" value="3"/> </section> <param name="read_length" value="151"/> <param name="coverage" value="20"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="true"/> <param name="golden_vcf" value="true"/> <param name="compress" value="false"/> </section> <output name="out_file1" file="chrMT-PE-VCF-BAM-panic_read1.fq" compare="diff"/> <output name="out_file2" file="chrMT-PE-VCF-BAM-panic_read2.fq" compare="diff"/> <output name="out_bam" file="chrMT-PE-VCF-BAM-panic.bam" compare="diff"/> <output name="out_vcf" file="chrMT-PE-VCF-BAM-panic.vcf" compare="diff" lines_diff="2"/> <assert_stdout has_text="Writing output VCF..."/> </test> <test> <conditional name="in_type"> <param name="input_type" value="built-in"/> <param name="reference" value="hg19chrmt" format="fasta"> <options from_data_table="fasta_indexes" /> </param> </conditional> <conditional name="lib_type_cond"> <param name="lib_type" value="single"/> </conditional> <section name="stats"> <param name="seed" value="1"/> </section> <param name="read_length" value="101"/> <section name="out_options"> <param name="prefix" value="out"/> <param name="golden_bam" value="false"/> <param name="golden_vcf" value="false"/> <param name="compress" value="false"/> </section> <output name="out_file1" file="chrMT_read1.fq" compare="diff"/> </test> </tests> </tool>