view tools/maf/maf_filter.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="MAF_filter" name="Filter MAF" version="1.0.1">
  <description>by specified attributes</description>
  <command interpreter="python">maf_filter.py $maf_filter_file $input1 $out_file1 $out_file1.files_path $species $min_size $max_size $min_species_per_block $exclude_incomplete_blocks ${input1.metadata.species}</command>
  <inputs>
    <page>
      <param name="input1" type="data" format="maf" label="MAF File"/>
      <param name="min_size" label="Minimum Size" value="0" type="integer"/>
      <param name="max_size" label="Maximum Size" value="0" type="integer" help="A maximum size less than 1 indicates no limit"/>
      <param name="species" type="select" display="checkboxes" multiple="true" label="Choose species" help="Select species to be included in the final alignment">
        <options>
          <filter type="data_meta" ref="input1" key="species" />
        </options>
      </param>
      <param name="min_species_per_block" type="select" label="Exclude blocks which have only one species" >
        <option value="2">Yes</option>
        <option value="1" selected="True">No</option>
      </param>
      <param name="exclude_incomplete_blocks" type="select" label="Exclude blocks which have missing species" >
        <option value="1">Yes</option>
        <option value="0" selected="True">No</option>
      </param>
      <repeat name="maf_filters" title="Filter">
        <param name="species1" type="select" label="When Species" multiple="false">
          <options>
            <filter type="data_meta" ref="input1" key="species" />
          </options>
        </param>
        <conditional name="species1_attributes">
          <param name="species1_attribute_type" type="select" label="Species Attribute">
            <option value="attribute_strand">Strand</option>
            <option value="attribute_chr" selected="true">Chromosome</option>
          </param>
          <when value="attribute_strand">
            <param name="species1_is_isnot" type="select" label="Conditional">
              <option value="==">Is</option>
              <option value="!=">Is Not</option>
            </param>
            <param name="species1_attribute" type="select" label="Strand">
              <option value="+" selected="true">+</option>
              <option value="-">-</option>
            </param>
            <repeat name="filter_condition" title="Filter Condition">
              <param name="species2" type="select" label="Species" multiple="false">
                <options>
                  <filter type="data_meta" ref="input1" key="species" />
                </options>
              </param>
              <conditional name="species2_attributes">
                <param name="species2_attribute_type" type="select" label="Species Attribute">
                  <option value="attribute_strand" selected="true">Strand</option>
                  <option value="attribute_chr">Chromosome</option>
                </param>
                <when value="attribute_strand">
                  <param name="species2_is_isnot" type="select" label="Conditional">
                    <option value="==">Is</option>
                    <option value="!=">Is Not</option>
                  </param>
                  <param name="species2_attribute" type="select" label="Strand">
                    <option value="+" selected="true">+</option>
                    <option value="-">-</option>
                  </param>
                </when>
                <when value="attribute_chr">
                  <param name="species2_is_isnot" type="select" label="Conditional">
                    <option value="in">Is</option>
                    <option value="not in">Is Not</option>
                  </param>
                  <param name="species2_attribute" type="text" label="Chromosome" value="chr1"/>
                </when>
              </conditional>
            </repeat>
          </when>
          <when value="attribute_chr">
            <param name="species1_is_isnot" type="select" label="Conditional">
              <option value="in">Is</option>
              <option value="not in">Is Not</option>
            </param>
            <param name="species1_attribute" type="text" label="Chromosome" value="chr1"/>
            <repeat name="filter_condition" title="Filter Condition">
              <param name="species2" type="select" label="Species" multiple="false">
                <options>
                  <filter type="data_meta" ref="input1" key="species" />
                </options>
              </param>
              <conditional name="species2_attributes">
                <param name="species2_attribute_type" type="select" label="Species Attribute">
                  <option value="attribute_strand">Strand</option>
                  <option value="attribute_chr" selected="true">Chromosome</option>
                </param>
                <when value="attribute_strand">
                  <param name="species2_is_isnot" type="select" label="Conditional">
                    <option value="==">Is</option>
                    <option value="!=">Is Not</option>
                  </param>
                  <param name="species2_attribute" type="select" label="Strand">
                    <option value="+" selected="true">+</option>
                    <option value="-">-</option>
                  </param>
                </when>
                <when value="attribute_chr">
                  <param name="species2_is_isnot" type="select" label="Conditional">
                    <option value="in">Is</option>
                    <option value="not in">Is Not</option>
                  </param>
                  <param name="species2_attribute" type="text" label="Chromosome" value="chr1"/>
                </when>
              </conditional>
            </repeat>
          </when>
        </conditional>
      </repeat>
    </page>
  </inputs>
  <configfiles>
    <configfile name="maf_filter_file">
#set $is_isnot_valid = {"==":"==", "!=":"!=", "in":"in", "not in":"not in"}
def maf_block_pass_filter( maf_block ):
#for $maf_filter in $maf_filters:
#if $len( $maf_filter['species1_attributes']['filter_condition'] ) == 0:
#continue
#end if
    primary_component = maf_block.get_component_by_src_start( """$maf_filter['species1'].value.encode( 'string_escape' )""".decode( 'string_escape' ) )
    if primary_component is not None:
#if $maf_filter['species1_attributes']['species1_attribute_type'] == 'attribute_chr':
        if primary_component.src.split( "." )[-1] $is_isnot_valid.get( $maf_filter['species1_attributes']['species1_is_isnot'].value.strip(), 'is in' ) """$maf_filter['species1_attributes']['species1_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ).split( "," ):
#else
        if primary_component.strand $is_isnot_valid.get( $maf_filter['species1_attributes']['species1_is_isnot'].value.strip(), '==' ) """$maf_filter['species1_attributes']['species1_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ):
#end if
#for $filter_condition in $maf_filter['species1_attributes']['filter_condition']:
            secondary_component = maf_block.get_component_by_src_start( """$filter_condition['species2'].value.encode( 'string_escape' )""".decode( 'string_escape' ) )
#if $filter_condition['species2_attributes']['species2_attribute_type'] == 'attribute_chr':
            if secondary_component is not None:
                if not ( secondary_component.src.split( "." )[-1] $is_isnot_valid.get( $filter_condition['species2_attributes']['species2_is_isnot'].value.strip(), 'is in' ) """$filter_condition['species2_attributes']['species2_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ).split( "," ) ):
                    return False
#else:
            if secondary_component is not None:
                if not ( secondary_component.strand $is_isnot_valid.get( $filter_condition['species2_attributes']['species2_is_isnot'].value.strip(), '==' ) """$filter_condition['species2_attributes']['species2_attribute'].value.encode( 'string_escape' )""".decode( 'string_escape' ) ):
                    return False
#end if
#end for
#end for
    return True
ret_val = maf_block_pass_filter( maf_block )
</configfile>
  </configfiles>
  <outputs>
    <data format="maf" name="out_file1" />
  </outputs>
<!--
  <tests>
    <test>
      <param name="input1" value="4.maf"/>
      <param name="species" value="bosTau2,canFam2,hg17,panTro1,rheMac2,rn3"/>
      <param name="exclude_incomplete_blocks" value="0"/>
      <param name="min_species_per_block" value="1"/>
      <param name="min_size" value="0"/>
      <param name="max_size" value="0"/>
      <param name="species1" value="hg17"/>
      <param name="species2" value="hg17"/>
      <param name="species1_attribute_type" value="attribute_chr"/>
      <param name="species1_is_isnot" value="in"/>
      <param name="species1_attribute" value="chr1"/>
      <param name="filter_condition"/> Test will ERROR when this is set or when it is not set.
      
      <output name="out_file1" file="cf_maf_limit_to_species.dat"/>
    </test>
  </tests>
-->
<help>
This tool allows you to build complex filters to be applied to each alignment block of a MAF file. You can define restraints on species based upon chromosome and strand. You can specify comma separated lists of chromosomes where appropriate.

.. class:: infomark

For example, this tool is useful to restrict a set of alignments to only those blocks which contain alignments between chromosomes that are considered homologous.

-----

.. class:: warningmark

If a species is not found in a particular block, all filters on that species are ignored.

-----

This tool allows the user to remove any undesired species from a MAF file. If no species are specified then all species will be kept. If species are specified, columns which contain only gaps are removed. The options for this are:

 * **Exclude blocks which have missing species** - suppose you want to restrict an 8-way alignment to human, mouse, and rat.  The tool will first remove all other species. Next, if this option is set to **YES** the tool WILL NOT return MAF blocks, which do not include human, mouse, or rat. This means that all alignment blocks returned by the tool will have exactly three sequences in this example.

 * **Exclude blocks which have only one species** - if this option is set to **YES** all single sequence alignment blocks WILL NOT be returned.

-----

You can also provide a size range and limit your output to the MAF blocks which fall within the specified range.

------

**Citation**

If you use this tool, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. &lt;http://www.ncbi.nlm.nih.gov/pubmed/21775304&gt;`_


</help>
</tool>