view tools/vcf_tools/extract.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

<tool id="vcf_extract" name="Extract" version="1.0.0">
  <description>reads from a specified region</description>
  <command interpreter="python">
    vcfPytools.py
      extract 
      --in=$input1
      --out=$output1
      #if $reference_sequence.value.strip()
        --reference-sequence=$reference_sequence
      #end if
      #if $region.value.strip()
        --region=$region
      #end if
      #if $keep_quality.value.strip()
        --keep-quality=$keep_quality
      #end if
      #if $keep_info.value.strip()
      --keep-info=$keep_info
      #end if
      #if $discard_info.value.strip()
        --discard-info=$discard_info
      #end if
      $pass_filter
  </command>
  <inputs>
    <param name="input1" label="VCF file" type="data" format="vcf" />
    <param name="reference_sequence" label="Extract records from this reference sequence" type="text" value='' />
    <param name="region" label="Extract records from this region" type="text" value='' help="The format of the region is ref:start..end, where the start and end coordinates are 1-based"/>
    <param name="keep_quality" label="Keep records containing this quality" type="text" value='' help="This requires two arguments: the quality value and a logical operator (eq - equals, le - less than or equal to, lt - less than, ge - greater than or equal to , gt - greater than) to determine which records to keep.  For example: '90 ge' will retain all records that have a quality of 90 or greater"/>
    <param name="keep_info" label="Keep records containing this info field" type="text" value='' />
    <param name="discard_info" label="Discard records containing this info field" type="text" value='' />
    <param name="pass_filter" label="Discard records whose filter field is not PASS" type="boolean" truevalue="--pass-filter" falsevalue="" checked="False"/>
  </inputs>
  <tests>
    <test>
      <param name="input1" value="test_filter_quality_9_DP_2000_lt.vcf" ftype="vcf" />
      <param name="reference_sequence" value='' />
	  <param name="region" value='' />
	  <param name="keep_quality" value='' />
	  <param name="keep_info" value='' />
	  <param name="discard_info" value='' />
	  <param name="pass_filter" value='true' />      
      <output name="output" file="test_extract_pass_filter_quality_9_DP_2000_lt.vcf" lines_diff="6" ftype="vcf" />
    </test>
    <test>
      <param name="input1" value="test.small.vcf" ftype="vcf" />
      <param name="reference_sequence" value='' />
      <param name="region" value='20:80000..100000' />
      <param name="keep_quality" value='' />
      <param name="keep_info" value='' />
      <param name="discard_info" value='' />
      <param name="pass_filter" value='false' />      
      <output name="output" file="test_extract_region_80000_100000.vcf" ftype="vcf" />
    </test>
    <test>
      <param name="input1" value="test.small.vcf" ftype="vcf" />
      <param name="reference_sequence" value='' />
      <param name="region" value='' />
      <param name="keep_quality" value='90 ge' />
      <param name="keep_info" value='' />
      <param name="discard_info" value='' />
      <param name="pass_filter" value='false' />      
      <output name="output" file="test_extract_quality_90_ge.vcf" ftype="vcf" />
    </test>
    <test>
      <param name="input1" value="test.small.vcf" ftype="vcf" />
      <param name="reference_sequence" value='' />
      <param name="region" value='' />
      <param name="keep_quality" value='' />
      <param name="keep_info" value='TV' />
      <param name="discard_info" value='' />
      <param name="pass_filter" value='false' />      
      <output name="output" file="test_extract_keep_info_TV.vcf" ftype="vcf" />
    </test>
    <test>
      <param name="input1" value="test.small.vcf" ftype="vcf" />
      <param name="reference_sequence" value='' />
      <param name="region" value='' />
      <param name="keep_quality" value='' />
      <param name="keep_info" value='' />
      <param name="discard_info" value='TV' />
      <param name="pass_filter" value='false' />      
      <output name="output" file="test_extract_discard_info_TV.vcf" ftype="vcf" />
    </test>
  </tests>
  <outputs>
    <data format="vcf" name="output1" label="${tool.name} from ${on_string}" />
  </outputs>
  <help>

**What it does**

This tool uses vcfPytools_' extract command to extract reads from a specified region of a VCF file

.. _vcfPytools: https://github.com/AlistairNWard/vcfPytools

Option **Extract records from this reference sequence** outputs all records from the specified reference sequence from the input vcf file into the output vcf file.

Option **Extract records from this region** outputs all records from the specified region from the input vcf file into the output vcf file.  The format of the region is ref:start..end, where the start and end coordinates are 1-based.

Option **Keep records containing this quality** allows only records with specified quality values to be retained.  This requires two arguments: the quality value and a logical operator (eq - equals, le - less than or equal to, lt - less than, ge - greater than or equal to , gt - greater than) to determine which records to keep.  For example: **90 ge** will retain all records that have a quality of 90 or greater.

Option **Keep records containing this info field** allows all records to be removed unless they contain this value in the info field.

Option **Discard records containing this info field** ensures that all records containing this value in the info field will not be included in the output file.  This cannot be used in conjunction with Keep info field to avoid conflict.

Option **Discard records whose filter field is not PASS** will only output records that have the filter field populated with PASS.  All filtered records or records that haven't undergone filtering will be discarded.


  </help>
</tool>