view DC_Genotyper.xml @ 4:92207f2ec144 draft

Various Fixes and added tool_data_conf
author geert-vandeweyer
date Thu, 25 Sep 2014 06:01:11 -0400
parents fa8566bd4a51
children 5c72a91ce518
line wrap: on
line source

<tool id="DC_Genotyper" name="DC Genotyper" version='0.0.1'>
	<description></description>
	<requirements>
		<requirement type='package' version='3.0.2'>R</requirement>
		<requirement type='package' version='0.1.18'>samtools</requirement>
		<requirement type='package' version='0.2.6'>tabix</requirement>
		<requirement type='package' version='latest'>twoBitToFa</requirement>
		<requirement type='package' version='1.92'>perl_module_threads</requirement>
		<requirement type='package' version='1.46'>perl_module_threads_shared</requirement>
		<requirement type='package' version='3.02'>perl_module_Thread_Queue</requirement>
		<requirement type='package' version='2.3.32'>igvtools</requirement>
        </requirements>
	<command interpreter="perl">DC_Genotyper.pl 
		-t "$targets" 
		-b "$bamfile" 
		-R "${ref.fields.path}" 
		-p  "\${GALAXY_SLOTS:-4}"
		#if $dbsnp.source == "history":
			-s "${dbsnp.ownFile}"
		#else 
			-s "${dbsnp.indices.fields.path}"
		#end if 
		-m $mincov
		-P $ploidy
		
		-a $output1 
		-v $output2 
	</command>	

	<inputs>
		<param  name="bamfile" type="data" format="bam" label="Sample BAM file" />
		<param  name="targets" type="data" format="bed" label="Enrichment BED file" /> 
		<param name="ref" type="select" label="Select a reference genome">
          		<options from_data_table="DC_Genotyper_indexes">
            		  <filter type="sort_by" column="2" />
            		    <validator type="no_options" message="No indexes are available" />
          		</options>
        	</param>
		<conditional name="dbsnp">
		  <param name="source" type="select" label="Will you select a dbSNP file from your history, or use a built in version (which is faster)">
			<option value="indexed">Use a built-in version</option>
			<option value="history">Use one from the history</option>
		  </param>
		  <when value="indexed">
			 <param name="indices" type="select" label="Select a reference genome">
          		 	<options from_data_table="dbsnp_indexes">
                         		<filter type="sort_by" column="2" />
            				<validator type="no_options" message="No indexes are available" />
          			</options>
        		</param>
		  </when>
                  <when value="history">
			<param name="ownFile" type="data" format="vcf,bcf" label="Select a dbSNP file from history"/>
		  </when>
          	</conditional>
		<param name="mincov" value="400" type="integer" label="Minimal Coverage Depth" />
		<param name="ploidy" type="integer" value='10' label="Expected Sample Ploidy" />
	</inputs>
	
	<outputs>
		<data format='txt' name="output1" label="${tool.name} on ${on_string}: Allele Fraction Distributions"/>
		<data format='vcf' name='output2' label="${tool.name} on ${on_string}: VCF file" />
	</outputs>
<help>

**What it does**

	1. get allele counts on all positions in specified targets (bed) using igvtools. Only SNPs !!
	2. remove known dbsnp positions (bcf file)
	3. Get distribution of background noise (pcr/sequencing errors), by modelling allele fractions as normal distributions.
	4. Based on these distributions, check each position for significant change from the reference allele (based on allele fraction)
	5. For abberant positions, check each alternate allele to see if it passes the background signal. 
	6. Generate VCF file. 


**Information**

This tools is created by Geert Vandeweyer. It is a very early version with several limitations. Current limitations are : no support for indels, no plotting of the noise-models, incorrect syntax in for multi-allelic sites in the VCF file.

Any feedback is welcome. 

</help>
</tool>