view neat_genreads.xml @ 6:5bcfac4f8116 draft

planemo upload commit e96b43f96afce6a7b7dfd4499933aad7d05c955e-dirty
author thondeboer
date Tue, 15 May 2018 17:12:43 -0400
parents 110fc7a9a016
children 7d10b55965c9
line wrap: on
line source

<tool id="neat_genreads"
      name="NEAT-genReads"
      version="1.0.0"
	  profile="16.04">
  <description>is a fine-grained read simulator</description>
  <requirements>
    <requirement type="package">numpy</requirement>
  </requirements>
  <command detect_errors="exit_code">
    <![CDATA[
    python2 $__tool_directory__/genReads.py
		#if $in_type.input_type == "built-in":
		  -r ${in_type.reference.fields.path}
		#else:
		  -r ${in_type.reference}
		#end if
		-R $read_length
		-c $coverage
		#if $stats.error_model_cond.error_model == 'average_rate' and $stats.error_model_cond.error_rate != '':
		  -E $stats.error_model_cond.error_rate
		#elif $stats.error_model_cond.error_model == 'error_model_file':
		  -e $stats.error_model_cond.error_file
		#end if
		#if $stats.mut_rate_cond.mut_rate == 'average_rate' and $stats.mut_rate_cond.error_rate != '':
		  -M $stats.mut_rate_cond.error_rate
		#elif $stats.mut_rate_cond.mut_rate == 'error_model_file':
		  -m stats.mut_rate_cond.error_file
		#elif $stats.mut_rate_cond.mut_rate == 'error_model_bed':
		  -Mb stats.mut_rate_cond.error_file_bed
		#end if
		#if $stats.gc_file:
		  --gc-model $stats.gc_file
		#end if
		#if $stats.ploidy != 2 and $stats.ploidy > 0:
		  -p $stats.ploidy
		#end if
		-o $out_options.prefix
		#if $lib_type_cond.lib_type == "paired":
		  #if $lib_type_cond.insert_mod_cond.insert_mod == 'fixed_insert':
		    --pe $lib_type_cond.insert_mod_cond.frag_length $lib_type_cond.insert_mod_cond.frag_length_sd
		  #else:
		    --pe-model $lib_type_cond.insert_mod_cond.insert_file
		  #end if
	    #end if
		$out_options.golden_bam
		$out_options.golden_vcf
		#if $target_vcf_cond.target_vcf == "use_vcf":
		  -v ${target_vcf_cond.target_file}
		#end if
		#if $target_cond.target == "targeted":
		  -t ${target_cond.target_file}
		  -to ${target_cond.off_target}
		#end if
		#if $stats.seed != "":
		  --rng $stats.seed
		#end if
		$out_options.compress
		$out_options.bypass
		#if $out_options.compress:
			#if not $out_options.bypass:
			  && mv "${out_options.prefix}_read1.fq.gz" read1.fq.gz
			  #if $lib_type_cond.lib_type == "paired":
				  && mv "${out_options.prefix}_read2.fq.gz" read2.fq.gz
				#end if
			#end if
			#if $out_options.golden_bam != "":
			  && mv "${out_options.prefix}_golden.bam" golden.bam
			#end if
			#if $out_options.golden_vcf != "":
			  && gunzip "${out_options.prefix}_golden.vcf.gz" && mv "${out_options.prefix}_golden.vcf" golden.vcf
			#end if
		#else:
			#if not $out_options.bypass:
			  && mv "${out_options.prefix}_read1.fq" read1.fq
			  #if $lib_type_cond.lib_type == "paired":
				  && mv "${out_options.prefix}_read2.fq" read2.fq
				#end if
			#end if
			#if $out_options.golden_bam != "":
			  && mv "${out_options.prefix}_golden.bam" golden.bam
			#end if
			#if $out_options.golden_vcf != "":
			  && mv "${out_options.prefix}_golden.vcf" golden.vcf
			#end if
		#end if
    ]]>
  </command>
  <inputs>

	<conditional name="in_type">
	  <param name="input_type" type="select" label="Use a built-in FASTA file or one from the history?">
		<option value="built-in">Built-in</option>
		<option value="history">History file</option>
	  </param>
	  <when value="built-in">
		<param name="reference"
			   type="select"
			   label="Select a built-in reference sequence"
			   help="The reference sequence that will be used as the basis for the simulated reads">
		  <options from_data_table="all_fasta" />
		</param>
	  </when>
	  <when value="history">
		<param name="reference"
			   type="data"
			   format="fasta"
			   label="The reference sequence (FASTA format)"
			   help="The reference sequence that will be used as the basis for the simulated reads"
		/>
	  </when>
	</conditional>
	
	<param name="read_length"
		 type="integer"
		 value="101"
		 size="4"
		 min="30"
		 label="Read length"
		 help="The read length for the simulated reads [-R]"
	/>
	<param name="coverage"
		 type="integer"
		 value="10"
		 size="4"
		 min="1"
		 label="Coverage"
		 help="Average coverage across entire dataset [-c]"
	/>
	
	<conditional name="lib_type_cond">
	  <param name="lib_type"
	         type="select"
			 label="Single-end or paired-end sequencing library?"
	  >
		<option value="paired">Paired-end</option>
	    <option value="single">Single-end</option>
	  </param>
	    <when value="paired">
		  <conditional name="insert_mod_cond">
			<param name="insert_mod"
			       type="select"
				   label="Used fixed insert size or empirical model?"
			>
			  <option value="fixed_insert">Fixed insert size</option>
			  <option value="model_insert">Use model file</option>
			</param>
			<when value="fixed_insert">
			<param name="frag_length"
				   type="integer"
				   value="300"
				   size="5"
				   min="50"
				   label="Fragment length"
				   help="The average total size of the fragment"
			/>
			<param name="frag_length_sd"
				   type="integer"
				   value="30"
				   size="4"
				   min="0"
				   label="Fragment length SD"
				   help="The standard deviation for the average total size of the fragment"
			/>
			</when>
			<when value="model_insert">
			<param name="insert_file"
				 optional="false"
				 type="data"
				 format="txt"
				 label="Emperical insert size model file"
				 help="Insert size model file, created by computeFraglen (python pickle format) [-pe-model]"
			/>
			</when>
		  </conditional>
	    </when>
	</conditional>

	<conditional name="target_cond">
	  <param name="target"
	         type="select"
			 label="Limit the simulated reads to target area?"
	  >
		<option value="full">No - Use complete reference</option>
	    <option value="targeted">Yes - Limit to target region</option>
	  </param>
	    <when value="targeted">
		  <param name="target_file"
				 type="data"
				 format="bed"
				 label="Target region file"
				 help="The simulated reads will primarily orginate from the target region [-t]"
		  />
		  <param name="off_target"
				 type="float"
				 value="0.02"
				 min="0"
				 max="1"
				 label="Off-target converage"
				 help="The coverage outside the target region will be this fraction. 1-this will be on-target coverage [-to]"
		  />
	    </when>
	</conditional>

	<conditional name="target_vcf_cond">
	  <param name="target_vcf"
	         type="select"
			 label="Use a VCF file to seed the simulation?"
	  >
		  <option value="no_vcf">No</option>
	    <option value="use_vcf">Yes</option>
	  </param>
	    <when value="use_vcf">
		  <param name="target_file"
				 type="data"
				 format="vcf"
				 label="Input VCF file"
				 help="Variants from this VCF will be inserted into the simulated sequence with 100% certainty [-v]"
		  />
	    </when>
	</conditional>

	<section name="stats"
			 title="Miscellanous settings"
			 expanded="false"
			 help="The settings for the error rates etc. for the model"
	>
	  <param name="ploidy"
	         type="integer"
			 value="2"
			 min="1"
			 max="10"
			 optional="false"
			 label="Ploidy of the genome"
			 help=""
	  />
	  <conditional name="error_model_cond">
		  <param name="error_model" type="select" label="Sequencing error rate model">
			  <option value="average_rate">Set an average rate</option>
			  <option value="error_model_file">Use an emperical model file</option>
		  </param>
		  <when value="average_rate">
			  <param name="error_rate"
					 optional="true"
					 type="float"
					 value=""
					 min="0"
					 max="0.3"
					 label="Average sequencing error rate (0-0.3)"
					 help="Leave blank to use default. The sequencing error rate model is rescaled to make this the average value [-E]"
			  />
		  </when>
		  <when value="error_model_file">
			  <param name="error_file"
					 optional="false"
					 type="data"
					 format="txt"
					 label="Emperical error rate model file"
					 help="Error rate model file, created by genSeqErrorModel.py (python pickle format) [-e]"
			  />
		  </when>
	  </conditional>
	  
	  <conditional name="mut_rate_cond">
		  <param name="mut_rate" type="select" label="Mutation rate model">
			  <option value="average_rate">Set an average mutation rate</option>
			  <option value="error_model_file">Use a PICKLE file with mutation rates</option>
			  <option value="error_model_bed">Use a BED file with mutation rates</option>
		  </param>
		  <when value="average_rate">
			  <param name="error_rate"
					 optional="true"
					 type="float"
					 value=""
					 min="0"
					 max="0.3"
					 label="Average mutation rate (0-0.3)"
					 help="Leave blank to use default. The mutation rate model is rescaled to make this the average value. These random mutations are inserted in addition to the once specified in the -v option [-M]"
			  />
		  </when>
		  <when value="error_model_file">
			  <param name="error_file"
					 optional="true"
					 type="data"
					 format="txt"
					 label="Mutation rate model file"
					 help="Mutation rate model file, created by genMutModel (python pickle format) [-m]"
			  />
		  </when>
		  <when value="error_model_bed">
			  <param name="error_file_bed"
					 optional="true"
					 type="data"
					 format="bed"
					 label="Mutation rate model BED file"
					 help="Error rate model file, in BED format [-Mb]"
			  />
		  </when>
	  </conditional>
	  
	  <param name="gc_file"
			 optional="true"
			 type="data"
			 format="txt"
			 label="Emperical GC-bias model file"
			 help="Empirical GC coverage bias distribution. Can be generated using computeGC (python pickle format) [--gc-model]"
	  />
	  <param name="seed"
		 type="integer"
		 min="1"
		 optional="true"
		 label="Random seed"
		 help="Identical seed values should produce indentical runs, so things like read locations, variant positions, error positions etc. should all be the same [--rng]"
	  />

	</section>
	
	<section name="out_options"
	         title="Optional output options"
			 expanded="false"
			 help="Optional output options"
	>
	
	<param name="prefix"
	       type="text"
	       label="Output prefix"
		   value="simulated_reads"
		   size="40"
		   help="Prefix for the name of the output simulated reads and other files [-o]"
	/>

	<param name="golden_vcf"
	       type="boolean"
		   checked="true"
		   truevalue="--vcf"
		   falsevalue=""
		   label="Create the golden VCF file?"
		   help="The golden VCF file will contain the golden truth for all variants created [--vcf]"
	/>
	<param name="golden_bam"
	       type="boolean"
		   checked="true"
		   truevalue="--bam"
		   falsevalue=""
		   label="Create the golden BAM file?"
		   help="The golden BAM file will contain the golden truth for all variants created [--bam]"
	/>
	<param name="compress"
	       type="boolean"
		   checked="true"
		   truevalue="--gz"
		   falsevalue=""
		   label="Compress the FASTQ and VCF files with gzip? [--gz]"
		   help=""
	/>
	<param name="bypass"
	       type="boolean"
		   checked="false"
		   truevalue="--no-fastq"
		   falsevalue=""
		   label="Bypass generation of FASTQ files?"
		   help="If checked, FASTQ files will not be created. Only BAM and VCF files will (if selected) [--no-fastq]"
	/>
	</section>
  </inputs>
  <outputs>
		<data format="fastqsanger"
		      name="out_file1"
					from_work_dir="read1.fq"
					label="${out_options.prefix}_read1.fq"
					metadata_source="in_type.reference">
		  <filter>not out_options['compress'] and not out_options['bypass']</filter>
		</data>
		<data format="fastqsanger"
		      name="out_file2"
					from_work_dir="read2.fq"
					label="${out_options.prefix}_read2.fq"
					metadata_source="in_type.reference">
			  <filter>not out_options['compress'] and not out_options['bypass'] and lib_type_cond['lib_type'] == 'paired'</filter>
		</data>
		<data format="bam"
					name="out_bam"
					from_work_dir="golden.bam"
					label="${out_options.prefix}_golden.bam"
					metadata_source="in_type.reference">
			<filter>out_options['golden_bam']</filter>
		</data>
		<data format="vcf"
					name="out_vcf"
					from_work_dir="golden.vcf"
					label="${out_options.prefix}_golden.vcf"
					metadata_source="in_type.reference">
			<filter>out_options['golden_vcf']</filter>
		</data>
		<data format="fastqsanger.gz"
		      name="out_file1gz"
					from_work_dir="read1.fq.gz"
					label="${out_options.prefix}_read1.fq.gz"
					metadata_source="in_type.reference">
		  <filter>not out_options['bypass'] and out_options['compress']</filter>
		</data>
		<data format="fastqsanger.gz"
		      name="out_file2gz"
					from_work_dir="read2.fq.gz"
					label="${out_options.prefix}_read2.fq.gz"
					metadata_source="in_type.reference">
			  <filter>not out_options['bypass'] and lib_type_cond['lib_type'] == 'paired' and out_options['compress']</filter>
		</data>
  </outputs>
	<tests>
		
		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="single"/>
			</conditional>
			<section name="stats">
				<param name="seed" value="1"/>
			</section>
			<param name="read_length" value="101"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="false"/>
				<param name="golden_vcf" value="false"/>
				<param name="compress" value="false"/>
			</section>
			<output name="out_file1" file="chrMT_read1.fq" compare="diff"/>
		</test>
		
		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="paired"/>
				<conditional name="insert_mod_cond">
					<param name="insert_mod" value="fixed_insert"/>
					<param name="frag_length" value="300"/>
					<param name="frag_length_sd" value="30"/>
				</conditional>
			</conditional>
			<section name="stats">
				<param name="seed" value="1"/>
			</section>
			<param name="read_length" value="101"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="false"/>
				<param name="golden_vcf" value="false"/>
				<param name="compress" value="false"/>
			</section>
			<output name="out_file1" file="chrMT-PE_read1.fq" compare="diff"/>
			<output name="out_file2" file="chrMT-PE_read2.fq" compare="diff"/>
		</test>

		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="paired"/>
				<conditional name="insert_mod_cond">
					<param name="insert_mod" value="fixed_insert"/>
					<param name="frag_length" value="300"/>
					<param name="frag_length_sd" value="30"/>
				</conditional>
			</conditional>
			<section name="stats">
				<param name="seed" value="1"/>
			</section>
			<param name="read_length" value="101"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="true"/>
				<param name="golden_vcf" value="true"/>
				<param name="compress" value="false"/>
			</section>
			<output name="out_file1" file="chrMT-PE-VCF-BAM_read1.fq" compare="diff"/>
			<output name="out_file2" file="chrMT-PE-VCF-BAM_read2.fq" compare="diff"/>
			<output name="out_bam" file="chrMT-PE-VCF-BAM.bam" compare="diff"/>
			<output name="out_vcf" file="chrMT-PE-VCF-BAM.vcf" compare="diff" lines_diff="2"/>
			<assert_stdout has_text="Writing output VCF..."/>
		</test>

		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="paired"/>
				<conditional name="insert_mod_cond">
					<param name="insert_mod" value="fixed_insert"/>
					<param name="frag_length" value="300"/>
					<param name="frag_length_sd" value="30"/>
				</conditional>
			</conditional>
			<section name="stats">
				<param name="seed" value="1"/>
			</section>
			<param name="read_length" value="101"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="true"/>
				<param name="golden_vcf" value="true"/>
				<param name="compress" value="false"/>
			</section>
			<conditional name="target_vcf_cond">
				<param name="target_vcf" value="use_vcf"/>
				<param name="target_file" value="chrMT-PE-VCF-BAM.vcf"/>
			</conditional>
			<output name="out_file1" file="chrMT-PE-VCF-BAM-vcf_read1.fq" compare="diff"/>
			<output name="out_file2" file="chrMT-PE-VCF-BAM-vcf_read2.fq" compare="diff"/>
			<output name="out_bam" file="chrMT-PE-VCF-BAM-vcf.bam" compare="diff"/>
			<output name="out_vcf" file="chrMT-PE-VCF-BAM-vcf.vcf" compare="diff" lines_diff="2"/>
			<assert_stdout has_text="Writing output VCF..."/>
		</test>

		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="paired"/>
				<conditional name="insert_mod_cond">
					<param name="insert_mod" value="fixed_insert"/>
					<param name="frag_length" value="300"/>
					<param name="frag_length_sd" value="30"/>
				</conditional>
			</conditional>
			<section name="stats">
				<param name="seed" value="1"/>
			</section>
			<param name="read_length" value="101"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="true"/>
				<param name="golden_vcf" value="true"/>
				<param name="compress" value="false"/>
			</section>
			<conditional name="target_cond">
			  <param name="target" value="targeted"/>
			  <param name="target_file" value="chrMT-Targets.bed"/>
			  <param name="off_target" value="0.02"/>
			</conditional>
			<output name="out_file1" file="chrMT-PE-VCF-BAM-Targeted_read1.fq" compare="diff"/>
			<output name="out_file2" file="chrMT-PE-VCF-BAM-Targeted_read2.fq" compare="diff"/>
			<output name="out_bam" file="chrMT-PE-VCF-BAM-Targeted.bam" compare="diff"/>
			<output name="out_vcf" file="chrMT-PE-VCF-BAM-Targeted.vcf" compare="diff" lines_diff="2"/>
			<assert_stdout has_text="Writing output VCF..."/>
		</test>

		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="paired"/>
				<conditional name="insert_mod_cond">
					<param name="insert_mod" value="fixed_insert"/>
					<param name="frag_length" value="300"/>
					<param name="frag_length_sd" value="30"/>
				</conditional>
			</conditional>
			<section name="stats">
				<param name="seed" value="1"/>
			</section>
			<param name="read_length" value="101"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="true"/>
				<param name="golden_vcf" value="true"/>
				<param name="compress" value="true"/>
			</section>
			<!-- The decompress does not seem to work 
			<output name="out_file1" file="chrMT-PE-VCF-BAM-gz_read1.fq.gz" compare="diff" decompress="true"/>
			<output name="out_file2" file="chrMT-PE-VCF-BAM-gz_read2.fq.gz" compare="diff" decompress="true"/>
			-->
			<output name="out_bam" file="chrMT-PE-VCF-BAM-gz.bam" compare="diff"/>
			<output name="out_vcf" file="chrMT-PE-VCF-BAM-gz.vcf" compare="diff" lines_diff="2"/>
			<assert_stdout has_text="Writing output VCF..."/>
		</test>

		<test>
			<conditional name="in_type">
				<param name="input_type" value="history"/>
			  <param name="reference" value="chrMT.fa" format="fasta"/>
			</conditional>
			<conditional name="lib_type_cond">
				<param name="lib_type" value="paired"/>
				<conditional name="insert_mod_cond">
					<param name="insert_mod" value="fixed_insert"/>
					<param name="frag_length" value="500"/>
					<param name="frag_length_sd" value="50"/>
				</conditional>
			</conditional>
			<conditional name="error_model_cond">
				<param name="error_model" value="average_rate"/>
				<param name="error_rate" value="0.123"/>
			</conditional>
			<conditional name="mut_rate_cond">
				<param name="mut_rate" value="average_rate"/>
				<param name="error_rate" value="0.123"/>
			</conditional>
			<section name="stats">
				<param name="seed" value="123"/>
				<param name="ploidy" value="3"/>
			</section>
			<param name="read_length" value="151"/>
			<param name="coverage" value="20"/>
			<section name="out_options">
				<param name="prefix" value="out"/>
				<param name="golden_bam" value="true"/>
				<param name="golden_vcf" value="true"/>
				<param name="compress" value="false"/>
			</section>
			<output name="out_file1" file="chrMT-PE-VCF-BAM-panic_read1.fq" compare="diff"/>
			<output name="out_file2" file="chrMT-PE-VCF-BAM-panic_read2.fq" compare="diff"/>
			<output name="out_bam" file="chrMT-PE-VCF-BAM-panic.bam" compare="diff"/>
			<output name="out_vcf" file="chrMT-PE-VCF-BAM-panic.vcf" compare="diff" lines_diff="2"/>
			<assert_stdout has_text="Writing output VCF..."/>
		</test>


	</tests>
</tool>