view tools/regVariation/quality_filter.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

<tool id="qualityFilter" name="Filter nucleotides" version="1.0.1">
  <description> based on quality scores</description>
  <command interpreter="python">
  	quality_filter.py 
  	$input 
  	$out_file1 
  	$primary_species 
  	$mask_species 
  	$score 
  	$mask_char 
  	${mask_region.region} 
  	#if $mask_region.region == "3"
  		${mask_region.lengthr},${mask_region.lengthl}
  	#elif $mask_region.region == "0"
  		1
  	#else
  		${mask_region.length}
	#end if
  	${GALAXY_DATA_INDEX_DIR}/quality_scores.loc
  </command>
  <inputs>
    <param format="maf" name="input" type="data" label="Select data"/>
    <param name="primary_species" type="select" label="Use quality scores of" display="checkboxes" multiple="true">
      <options>
        <filter type="data_meta" ref="input" key="species" />
      </options>  
    </param>
	<param name="mask_species" type="select" label="Mask Species" display="checkboxes" multiple="true">
      <options>
        <filter type="data_meta" ref="input" key="species" />
      </options>  
	</param>
	<param name="score" size="10" type="integer" value="20" label="Quality score cut-off" help="Cut-off value of 20 means mask all nucleotides having quality score less than or equal to 20"/>
	<param name="mask_char" size="5" type="select" label="Mask character">
      <option value="0" selected="true">#</option>
      <option value="1">$</option>
      <option value="2">^</option>
      <option value="3">*</option>
      <option value="4">?</option>
      <option value="5">N</option>
    </param>
	<conditional name="mask_region">
      <param name="region" type="select" label="Mask region">
        <option value="0" selected="true">Only the corresponding nucleotide </option>
        <option value="1">Corresponding column + right-side neighbors</option>
        <option value="2">Corresponding column + left-side neighbors</option>
        <option value="3">Corresponding column + neighbors on both sides</option>
      </param>
      <when value="0">
      </when>
      <when value="1">
        <param name="length" size="10" type="integer" value="2" label="Number of right-side neighbors"/>
      </when>
      <when value="2">
        <param name="length" size="10" type="integer" value="2" label="Number of left-side neighbors"/>
      </when>
      <when value="3">
        <param name="lengthr" size="10" type="integer" value="2" label="Number of neighbors on right-side" />
        <param name="lengthl" size="10" type="integer" value="2" label="Number of neighbors on left-side" />
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data format="maf" name="out_file1" metadata_source="input"/>
  </outputs>
  <requirements>
    <requirement type="python-module">numpy</requirement>
  </requirements>
  <tests>
    <test>
      <param name="input" value="6.maf"/>
      <param name="primary_species" value="panTro2"/>
      <param name="mask_species" value="hg18"/>
      <param name="score" value="50"/>
      <param name="mask_char" value="0"/>
      <param name="region" value="0" />
      <output name="out_file1" file="6_quality_filter.maf"/>
    </test>
  </tests>
 <help> 

.. class:: infomark

**What it does**

This tool takes a MAF file as input and filters nucleotides in every alignment block of the MAF file based on their quality/PHRED scores. 

-----

.. class:: warningmark

**Note**

Any block/s not containing the primary species (species whose quality scores is to be used), will be omitted. 
Also, any primary species whose quality scores are not available in Galaxy will be considered as a non-primary species. This info will appear as a message in the job history panel. 

-----

**Example**

- For the following alignment block::

   a score=4050.0
   s hg18.chrX    3719221 48 - 154913754 tattttacatttaaaataaatatgtaaatatatattttatatttaaaa 
   s panTro2.chrX 3560945 48 - 155361357 tattttatatttaaaataaagatgtaaatatatattttatatttaaaa 

- running this tool with **Primary species as panTro2**, **Mask species as hg18, panTro2**, **Quality cutoff as 20**, **Mask character as #** and **Mask region as only the corresponding position** will return::

   a score=4050.0
   s hg18.chrX    3719221 48 - 154913754 ###tttac#####a###a#atatgtaaat###tattt#####ttaaaa 
   s panTro2.chrX 3560945 48 - 155361357 ###tttat#####a###a#agatgtaaat###tattt#####ttaaaa 
   
   where, the positions containing # represent panTro2 nucleotides having quality scores less than 20.
  </help>  
</tool>