Mercurial > repos > galaxyp > filter_by_fasta_ids
view filter_by_fasta_ids.xml @ 3:3c623e81be77 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/filter_by_fasta_ids commit 0556e0fe5aa17c84033a75a45baeb3a4c2b5ff76
author | galaxyp |
---|---|
date | Fri, 15 Feb 2019 16:38:31 -0500 |
parents | 1bd985f14938 |
children | cd22452edec2 |
line wrap: on
line source
<tool id="filter_by_fasta_ids" name="Filter FASTA" version="2.1"> <description>on the headers and/or the sequences</description> <macros> <xml name="regexp_macro" token_label="Regular expression pattern"> <param name="regexp" type="text" value="" label="@LABEL@" help="Use the Python regular expression syntax as specified in https://docs.python.org/3/library/re.html"> <validator type="empty_field" /> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> <mapping initial="none"> <add source="'" target="'"'"'" /> </mapping> </sanitizer> </param> </xml> </macros> <requirements> <requirement type="package" version="3.6.5">python</requirement> </requirements> <command><![CDATA[ python '$__tool_directory__/filter_by_fasta_ids.py' -i '$input' #if $header_criteria.header_criteria_select == 'id_list' --id_list '$header_criteria.identifiers' #if $header_criteria.id_regex.find == 'pattern': --pattern '$header_criteria.id_regex.pattern' #elif $header_criteria.id_regex.find == 'beginning': --pattern '$header_criteria.id_regex.pattern' #end if #elif $header_criteria.header_criteria_select == 'regexp' --header_regexp '$header_criteria.regexp' #end if #if $sequence_criteria.sequence_criteria_select == 'seq_length' --min_length $sequence_criteria.min_length #if str($sequence_criteria.max_length) --max_length $sequence_criteria.max_length #end if #elif $sequence_criteria.sequence_criteria_select == 'regexp' --sequence_regexp '$sequence_criteria.regexp' #end if $dedup -o '$output' #if $output_discarded -d '$discarded' #end if ]]></command> <inputs> <param name="input" type="data" format="fasta" label="FASTA sequences"/> <conditional name="header_criteria"> <param name="header_criteria_select" type="select" label="Criteria for filtering on the headers"> <option value="">No filtering</option> <option value="id_list">List of IDs</option> <option value="regexp">Regular expression on the headers</option> </param> <when value="" /> <when value="id_list"> <param name="identifiers" type="data" format="txt" label="List of IDs to extract sequences for"/> <conditional name="id_regex"> <param name="find" type="select" label="Match IDs by"> <option value="beginning">Default: ID is expected at the beginning: >ID </option> <help>Default: >ID will use search pattern >([^| ]+) to input ID; Use custom regex to change</help> <option value="pattern">Custom regex pattern</option> </param> <when value="beginning"> <param name="pattern" type="hidden" value=">([^| ]+)" label="regex search pattern for ID" > <sanitizer sanitize="False"/> <validator type="regex" message="must include a group that returns an ID">^.*[(](?![?]:).*[)].*$</validator> </param> </when> <when value="pattern"> <param name="pattern" type="text" value="" label="regex search pattern for ID"> <help>search pattern must contain %s where the ID will be substituted. Use this for Uniprot Acc: >.+?\|(.+?)\|.*$ </help> <sanitizer sanitize="False"/> <validator type="regex" message="must include a group that returns an ID">^.*[(](?![?]:).*[)].*$</validator> </param> </when> </conditional> </when> <when value="regexp"> <expand macro="regexp_macro" label="Regular expression pattern the header should match" /> </when> </conditional> <conditional name="sequence_criteria"> <param name="sequence_criteria_select" type="select" label="Criteria for filtering on the sequences"> <option value="">No filtering</option> <option value="seq_length">Sequence length</option> <option value="regexp">Regular expression on the sequences</option> </param> <when value="" /> <when value="seq_length"> <param name="min_length" type="integer" value="0" label="Minimum length" /> <param name="max_length" type="integer" min="1" value="" optional="true" label="Maximum length" /> </when> <when value="regexp"> <expand macro="regexp_macro" label="Regular expression pattern the sequence should match" /> </when> </conditional> <param name="dedup" type="boolean" truevalue="--dedup" falsevalue="" label="Remove duplicate sequences" /> <param name="output_discarded" type="boolean" label="Output discarded FASTA entries" /> </inputs> <outputs> <data name="output" format="fasta" label="${tool.name} on ${on_string}: FASTA sequences"/> <data name="discarded" format="fasta" label="${tool.name} on ${on_string}: discarded entries"> <filter>output_discarded</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <param name="input" ftype="fasta" value="input.fasta" /> <param name="header_criteria_select" value="id_list" /> <param name="identifiers" ftype="txt" value="ids.txt" /> <param name="dedup" value="True" /> <output name="output" file="output_dedup.fasta" /> </test> <test expect_num_outputs="1"> <param name="input" ftype="fasta" value="input_sp.fasta" /> <param name="header_criteria_select" value="id_list" /> <param name="find" value="pattern" /> <param name="pattern" value=">.+?\|(.+?)\|.*$" /> <param name="identifiers" ftype="txt" value="ids_sp.txt" /> <param name="dedup" value="True" /> <output name="output" file="output_sp_dedup.fasta" /> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="input.fasta" /> <param name="header_criteria_select" value="id_list" /> <param name="identifiers" ftype="txt" value="ids.txt" /> <param name="dedup" value="False" /> <param name="output_discarded" value="True" /> <output name="output" file="output_not_dedup.fasta" /> <output name="discarded" file="discarded_not_dedup.fasta" /> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="input.fasta" /> <param name="header_criteria_select" value="regexp" /> <param name="regexp" value="2" /> <param name="dedup" value="False" /> <param name="output_discarded" value="True" /> <output name="output" file="output_header_regexp.fasta" /> <output name="discarded" file="discarded_header_regexp.fasta" /> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="input.fasta" /> <param name="sequence_criteria_select" value="seq_length" /> <param name="min_length" value="5" /> <param name="dedup" value="False" /> <param name="output_discarded" value="True" /> <output name="output" file="output_min_length5.fasta" /> <output name="discarded" file="discarded_min_length5.fasta" /> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="input.fasta" /> <param name="sequence_criteria_select" value="seq_length" /> <param name="max_length" value="4" /> <param name="dedup" value="False" /> <param name="output_discarded" value="True" /> <output name="output" file="output_max_length4.fasta" /> <output name="discarded" file="discarded_max_length4.fasta" /> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="input.fasta" /> <param name="sequence_criteria_select" value="regexp" /> <param name="regexp" value="T{2,}" /> <param name="dedup" value="False" /> <param name="output_discarded" value="True" /> <output name="output" file="output_sequence_regexp.fasta" /> <output name="discarded" file="discarded_sequence_regexp.fasta" /> </test> </tests> <help><![CDATA[ **What it does** Filter entries of a FASTA file on the headers and/or the sequences based on various criteria. ]]></help> </tool>