Mercurial > repos > iuc > proteinortho_grab_proteins
view proteinortho_grab_proteins.xml @ 9:4ab23324f633 draft default tip
planemo upload for repository https://gitlab.com/paulklemm_PHD/proteinortho commit 06b043fd9534a475d8aa27b3295dd84d0b980b4f
author | iuc |
---|---|
date | Wed, 06 Mar 2024 20:44:56 +0000 |
parents | b183a90fe278 |
children |
line wrap: on
line source
<tool id="proteinortho_grab_proteins" name="Proteinortho grab proteins" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" profile="@PROFILE@"> <description>finds genes/proteins in a given fasta file</description> <macros> <import>proteinortho_macros.xml</import> </macros> <expand macro="biotools"/> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ ## the following ln-action is necessary, since the file names are used by proteinortho (output contains filenames => species names) #import re #for $f in $input_files# ln -sf '$f' '${re.sub('[^\w\-_.]', '_', f.element_identifier)}' && #end for# #if $query.querytype == "file": ln -sf '$query.queryfile' 'query' && #end if mkdir output && cd output && proteinortho_grab_proteins.pl --tofiles #if $regex: '$regex' #end if $source #if $query.querytype == "string": '$query.querystring' #else: ../query #end if #for $f in $input_files# ../${re.sub('[^\w\-_.]', '_', f.element_identifier)} #end for# ]]></command> <inputs> <param name="input_files" type="data" format="fasta" multiple="true" min="1" label="Select the input fasta files"/> <conditional name="query"> <param name="querytype" type="select" label="Query type"> <option value="string" selected="true">String</option> <option value="file">orthology-groups output file</option> </param> <when value="string"> <param name="querystring" type="text" label="A string of the protein/gene name/identifier that you want to search"> <validator type="regex" negate="true" message="Identifier must not end with a backslash">.*\\$</validator> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="!"/> <add value="="/> <add value="-"/> <add value="."/> <add value="*"/> <add value="?"/> <add value="+"/> <add value="\\"/> <add value="_"/> <add value="|"/> <add value="\t"/> <add value="	"/> <!-- tab --> <add value=","/> <add value=";"/> <add value="["/> <!-- left square bracket, e.g subselecting from vec[1] --> <add value="]"/> <!-- right square bracket --> <add value="("/> <!-- left parenthesis --> <add value=")"/> <!-- right parenthesis --> </valid> </sanitizer> </param> </when> <when value="file"> <param name="queryfile" type="data" format="tabular" label="A orthology-groups file" help="For each group a fasta file is generated containing all proteins/genes of that group."/> </when> </conditional> <param argument="--regex" type="boolean" checked="false" truevalue="-E" falsevalue="" label="Enable regular expressions (perl)" help="If not: the string is escaped (e.g. | -> \|) [-E]"/> <param argument="--source" type="boolean" checked="false" truevalue="-source" falsevalue="" label="Add the filename to the gene/protein-name [--source]"/> </inputs> <outputs> <collection name="listproteinorthograbproteins" type="list" label="${tool.name} on ${on_string}: list of fasta"> <discover_datasets pattern="__designation__" format="fasta" directory="output" visible="false"/> </collection> </outputs> <tests> <test> <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/> <param name="querytype" value="string"/> <param name="querystring" value="E_1"/> <output_collection name="listproteinorthograbproteins" count="1"/> </test> <test> <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/> <param name="querytype" value="string"/> <param name="regex" value="true"/> <param name="source" value="true"/> <param name="querystring" value="M..2"/> <output_collection name="listproteinorthograbproteins" count="1"/> </test> <test> <param name="input_files" value="L.fasta,C.fasta,C2.fasta,E.fasta,M.fasta"/> <param name="querytype" value="file"/> <param name="queryfile" value="result.proteinortho.tsv"/> <output_collection name="listproteinorthograbproteins" count="34"/> </test> </tests> <help><![CDATA[proteinortho grab proteins **What it does** proteinortho_grab_proteins : find gene(s)/protein(s) in a given fasta file and retrieve their sequence(s). You can also use a orthology-groups file, then all sequences for each group (one line of the file) is outputted. This can result in many files! **Other Proteinortho-Tools for downstream analysis** * `proteinortho summary` : Summaries the orthology-pairs/RBH files to determine how the species are connected to each other. More information can be found on github https://gitlab.com/paulklemm_PHD/proteinortho ]]> </help> <expand macro="citations"/> </tool>