Mercurial > repos > iuc > rrmscorer
changeset 0:02b0e0f78d8a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/rrmscorer commit 1ed15985d486840a247b2803613c584c074e4744
| author | iuc |
|---|---|
| date | Wed, 17 Sep 2025 14:53:30 +0000 |
| parents | |
| children | |
| files | RRMScorer.xml macros.xml test-data/input.fasta |
| diffstat | 3 files changed, 533 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RRMScorer.xml Wed Sep 17 14:53:30 2025 +0000 @@ -0,0 +1,515 @@ +<tool id="rrmscorer" name="RRM-Scorer" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Predicts RNA Recognition Motif (RRM) scores</description> + <macros> + <import>macros.xml</import> + </macros> + <xrefs> + <xref type="bio.tools">RRMScorer</xref> + </xrefs> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + mkdir -p json tabular aligned plots && + rrmscorer + --json 'json' + --csv 'tabular' + --window_size $tool_parameters.window_size + #if str($input_sequence.input_type_cond.input_type) == 'fasta' + --fasta '$input_sequence.input_type_cond.fasta_input' + #elif str($input_sequence.input_type_cond.input_type) == 'uniprot' + --uniprot '$input_sequence.input_type_cond.uniprot_id' + #end if + + #if $output_options.generate_plots + --plot 'plots' + #end if + + #if $output_options.generate_fasta + --aligned 'aligned' + #end if + + #if $tool_parameters.target and not $output_options.top_scoring_rna + --rna '$tool_parameters.target' + #else + --top + #end if + ]]></command> + <inputs> + <section name="input_sequence" title="Input sequence" expanded="true"> + <conditional name="input_type_cond"> + <param name="input_type" type="select" label="Select the type of sequence input" optional="false"> + <option value="fasta" selected="true">FASTA file containing the protein sequence(s)</option> + <option value="uniprot">Protein identifier from UniProt</option> + </param> + <when value="fasta"> + <param name="fasta_input" type="data" format="fasta" optional="false" multiple="false" label="Protein sequence(s) in FASTA format" help="Provide a FASTA file containing the protein sequences."> + <validator type="dataset_ok_validator"/> + </param> + </when> + <when value="uniprot"> + <param name="uniprot_id" type="text" value="" optional="false" label="Protein identifier from UniProt" help="Provide a UniProt ID (e.g. P19339)."> + <validator type="empty_field" message="Missing UniProt ID"/> + </param> + </when> + </conditional> + </section> + <section name="tool_parameters" title="Tool parameters" help="Configure this section to select the predictions to be executed"> + <param name="target" type="text" optional="true" label="Target RNA sequence (min. 5 nucleotides)" help="Provide a valid target RNA sequence. Leave blank if not specifying a target RNA so that the predictor will use the top-scoring RNA by default."> + <validator type="regex" message="The sequence must consist only of RNA nucleotides (A, U, G, C) or be left blank.">^([AUGCaugc]+)?$</validator> + </param> + <param name="window_size" type="select" label="The window size to test"> + <option value="5" selected="true">5</option> + <option value="3">3</option> + </param> + </section> + <section name="output_options" title="Output parameters" help="Configure this section to define the tool output files"> + <param name="generate_plots" type="boolean" label="Generate score plots for all the RNA possible windows" help="Enable to generate score plots."/> + <param name="top_scoring_rna" type="boolean" label="Find the top-scoring RNA for the specified RRM(s)" help="Enable to find and plot the top-scoring RNA. Attention: This option overrides the target RNA if present."/> + <param name="generate_fasta" type="boolean" label="Generate a FASTA file for each input sequence aligned to the HMM" help="Enable to generate a FASTA file with aligned sequences."/> + </section> + </inputs> + <outputs> + <collection name="split_csv" type="list" label="Tabular predictions by sequence"> + <discover_datasets pattern="__designation_and_ext__" directory="tabular" visible="true"/> + </collection> + <collection name="split_json" type="list" label="Json predictions by sequence"> + <discover_datasets pattern="__designation_and_ext__" directory="json" visible="true"/> + </collection> + <collection name="split_aligned" type="list" label="Alignment in FASTA format by sequence"> + <discover_datasets pattern="__designation_and_ext__" directory="aligned" visible="true"/> + </collection> + <collection name="split_plots" type="list" label="Plots by sequence"> + <discover_datasets pattern="__designation_and_ext__" directory="plots" visible="true"/> + </collection> + </outputs> + <tests> + <!-- Test 1: UniProt ID P19339, Window Size 5, RNA AUGGCU --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="5"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="2"/> + <output_collection name="split_aligned" count="0"/> + <output_collection name="split_plots" count="0"/> + </test> + <!-- Test 2: UniProt ID P19339, Window Size 3, RNA AUGGCU --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="3"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 3"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="2"/> + <output_collection name="split_aligned" count="0"/> + <output_collection name="split_plots" count="0"/> + </test> + <!-- Test 3: UniProt ID P19339, Window Size 5, RNA AUGGCU, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="generate_fasta" value="true"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--aligned 'aligned'"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="2"/> + <output_collection name="split_aligned" count="2"/> + <output_collection name="split_plots" count="0"/> + </test> + <!-- Test 4: UniProt ID P19339, Window Size 5, Top RNA, with Plots --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="true"/> + <param name="generate_plots" value="true"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--top"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="0"/> + <output_collection name="split_aligned" count="0"/> + <output_collection name="split_plots" count="8"/> + </test> + <!-- Test 5: UniProt ID P19339, Window Size 5, Top RNA, with Plots, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="true"/> + <param name="generate_plots" value="true"/> + <param name="generate_fasta" value="true"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--plot 'plots'"/> + <has_text text="--aligned 'aligned'"/> + <has_text text="--top"/> + </assert_command> + <output_collection name="split_json" type="list" count="2"/> + <output_collection name="split_aligned" type="list" count="2"/> + <output_collection name="split_plots" type="list" count="8"/> + </test> + <!-- Test 6: UniProt ID P19339, Window Size 5, RNA AUGGCU, with Plots --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="generate_plots" value="true"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="2"/> + <output_collection name="split_aligned" count="0"/> + <output_collection name="split_plots" count="2"/> + </test> + <!-- Test 7: UniProt ID P19339, Window Size 5, RNA AUGGCU, with Plots, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P19339"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="false"/> + <param name="generate_fasta" value="true"/> + <param name="generate_plots" value="true"/> + </section> + <assert_command> + <has_text text="--uniprot 'P19339'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--plot 'plots'"/> + <not_has_text text="--top"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="2"/> + <output_collection name="split_aligned" count="2"/> + <output_collection name="split_plots" count="2"/> + </test> + <!-- Test 8: Fasta file, Window Size 5, RNA AUGGCU, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="fasta"/> + <param name="fasta_input" value="input.fasta" ftype="fasta"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="generate_fasta" value="true"/> + </section> + <assert_command> + <has_text text="--fasta"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + </assert_command> + <output_collection name="split_json" count="2"/> + <output_collection name="split_csv" count="2"/> + <output_collection name="split_aligned" count="2"/> + <output_collection name="split_plots" type="list" count="0"/> + </test> + <!-- Test 9: Fasta file, Window Size 5, Top RNA, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="fasta"/> + <param name="fasta_input" value="input.fasta" ftype="fasta"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="true"/> + <param name="generate_fasta" value="true"/> + </section> + <assert_command> + <has_text text="--fasta"/> + <not_has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--aligned 'aligned'"/> + <has_text text="--top"/> + </assert_command> + <output_collection name="split_csv" type="list" count="0"/> + <output_collection name="split_json" type="list" count="2"/> + <output_collection name="split_plots" type="list" count="0"/> + <output_collection name="split_aligned" count="2"/> + </test> + <!-- Test 10: Fasta file, Window Size 5, Top RNA, with Plots, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="fasta"/> + <param name="fasta_input" value="input.fasta" ftype="fasta"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="true"/> + <param name="generate_fasta" value="true"/> + <param name="generate_plots" value="true"/> + </section> + <assert_command> + <has_text text="--fasta"/> + <not_has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--plot 'plots'"/> + <has_text text="--aligned 'aligned'"/> + <has_text text="--top"/> + </assert_command> + <output_collection name="split_csv" type="list" count="0"/> + <output_collection name="split_json" type="list" count="2"/> + <output_collection name="split_aligned" type="list" count="2"/> + <output_collection name="split_plots" type="list" count="8"/> + </test> + <!-- Test 11: Fasta file, Window Size 5, RNA AUGGCU, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="fasta"/> + <param name="fasta_input" value="input.fasta" ftype="fasta"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="window_size" value="5"/> + <param name="target" value="AUGGCU"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="false"/> + <param name="generate_fasta" value="true"/> + </section> + <assert_command> + <has_text text="--fasta"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--aligned 'aligned'"/> + <not_has_text text="--top"/> + </assert_command> + <output_collection name="split_csv" type="list" count="2"/> + <output_collection name="split_json" type="list" count="2"/> + <output_collection name="split_plots" type="list" count="0"/> + <output_collection name="split_aligned" count="2"/> + </test> + <!-- Test 12: Fasta file, Window Size 5, RNA AUGGCU, with Plots, Aligned FASTA files --> + <test expect_num_outputs="4"> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="fasta"/> + <param name="fasta_input" value="input.fasta" ftype="fasta"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="window_size" value="5"/> + <param name="target" value="AUGGCU"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="false"/> + <param name="generate_fasta" value="true"/> + <param name="generate_plots" value="true"/> + </section> + <assert_command> + <has_text text="--fasta"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + <has_text text="--plot 'plots'"/> + <has_text text="--aligned 'aligned'"/> + <not_has_text text="--top"/> + </assert_command> + <output_collection name="split_csv" type="list" count="2"/> + <output_collection name="split_json" type="list" count="2"/> + <output_collection name="split_plots" type="list" count="2"/> + <output_collection name="split_aligned" count="2"/> + </test> + <!-- Test 13: Non-RRM-RNA protein (P05067) --> + <test> + <section name="input_sequence"> + <conditional name="input_type_cond"> + <param name="input_type" value="uniprot"/> + <param name="uniprot_id" value="P05067"/> + </conditional> + </section> + <section name="tool_parameters"> + <param name="target" value="AUGGCU"/> + <param name="window_size" value="5"/> + </section> + <section name="output_options"> + <param name="top_scoring_rna" value="false"/> + <param name="generate_plots" value="false"/> + <param name="generate_fasta" value="false"/> + </section> + <assert_command> + <has_text text="--uniprot 'P05067'"/> + <has_text text="--rna 'AUGGCU'"/> + <has_text text="--window_size 5"/> + <has_text text="--json 'json'"/> + <has_text text="--csv 'tabular'"/> + </assert_command> + <output_collection name="split_csv" type="list" count="0"/> + <output_collection name="split_json" type="list" count="0"/> + <output_collection name="split_plots" type="list" count="0"/> + <output_collection name="split_aligned" type="list" count="0"/> + </test> + </tests> + <creator> + <organization name="Bio2Byte, Vrije Universiteit Brussel (VUB)" address="Interuniversity Institute Bioinformatics Brussels, Université Libre de Bruxelles, 1050 Ixelles, Brussels, Belgium" url="https://bio2byte.be/rrmscorer" email="bio2byte@vub.be" image="https://0.gravatar.com/avatar/2b51fb7600d876086669bcc85a941b763a81d1c2bb3c667b8c83a1aa892cf740"/> + </creator> + <help><![CDATA[ + This tool allows you to predict RNA Recognition Motif (RRM) scores for protein + sequences provided in *FASTA* format or as *UniProt IDs*. + + **RRMScorer** is designed to predict RNA binding preferences for proteins containing + RNA recognition motifs (RRMs), the most prevalent RNA binding domain in eukaryotes. + + **Abstract:** + + By carefully analysing a dataset of 187 RRM-RNA structural complexes, we calculated + residue-level binding scores using a probabilistic model derived from + amino acid-nucleotide interaction propensities, which are the basis of + **RRMScorer**. + + With its ability to provide residue-level insights and accurate predictions, + **RRMScorer** serves as a valuable tool for researchers exploring the functional + landscape of RRM-RNA interactions. + + **Methodology**: + + The input sequence is scanned against our RRMScorer hidden Markov model (HMM) + to (i) identify whether the input sequence contains any RRM domain and (ii) + map to the 20 positions in the RRM protein sequence alignment that we use to compute + the RNA binding scores. + + If one or more RRM domains are identified in the input sequence, RRMScorer computes + the score of the user-defined RNA sequence, or if absent, the scores for all the + 1024 RNA possible sequences with a length of 5 nt. By utilizing a specific + RNA sequence, the user can inspect to which 5-nt windows the RRM is more likely + to bind. + + **Input fields:** + + * **Protein sequence(s) FASTA format:** Provide a *FASTA* file containing the protein sequences. + * **Protein identifier from Uniprot:** Provide a *UniProt ID* (e.g. P19339). + * **Custom RNA target:** Enable or disable the use of a custom RNA target for the predictions. + * **Target:** Provide a valid target RNA sequence (minimum 5 nucleotides) if Custom RNA target is enabled. + * **The window size to test:** Select the window size (either 3 or 5 nucleotides). + * **Plot options:** Configure plot output options. + + **Output:** + + The results are provided in comprehensive bar plots as well as in + CSV and JSON formats. When a custom RNA is not provided, the results will + include protein sequence logos for a range of top-scoring RNA sequences, as well + as the aforementioned CSV and JSON files with the scores. + + **Funding:** + This project has received funding from the European Union's Horizon 2020 research + and innovation programme under the Marie Skłodowska-Curie grant + agreement No. 813239. This work was supported by the European Regional + Development Fund and Brussels-Capital Region-Innoviris within the framework of the + Operational Programme 2014-2020 (ERDF-2020 project ICITY-RDI.BRU) + ]]> + </help> + <citations> + <citation type="doi">10.1371/journal.pcbi.1010859</citation> + <citation type="doi">10.1093/nar/gkaf367</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Sep 17 14:53:30 2025 +0000 @@ -0,0 +1,11 @@ +<macros> + <token name="@TOOL_VERSION@">1.0.11</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">22.05</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">rrmscorer</requirement> + <requirement type="package" version="0.11.1">seaborn-base</requirement> + </requirements> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.fasta Wed Sep 17 14:53:30 2025 +0000 @@ -0,0 +1,7 @@ +>sp|P19339|SXL_DROME Protein sex-lethal OS=Drosophila melanogaster OX=7227 GN=Sxl PE=1 SV=1 +MYGNNNPGSNNNNGGYPPYGYNNKSSGGRGFGMSHSLPSGMSRYAFSPQDTEFSFPSSSS +RRGYNDFPGCGGSGGNGGSANNLGGGNMCHLPPMASNNSLNNLCGLSLGSGGSDDLMNDP +RASNTNLIVNYLPQDMTDRELYALFRAIGPINTCRIMRDYKTGYSFGYAFVDFTSEMDSQ +RAIKVLNGITVRNKRLKVSYARPGGESIKDTNLYVTNLPRTITDDQLDTIFGKYGSIVQK +NILRDKLTGRPRGVAFVRYNKREEAQEAISALNNVIPEGGSQPLSVRLAEEHGKAKAAHF +MSQMGVVPANVPPPPPQPPAHMAAAFNMMHRGRSIKSQQRFQNSHPYFDAKKFI \ No newline at end of file
