comparison m6anet.xml @ 0:40f186d91e67 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/m6anet commit cfa942e434b3c39e70c06cf4968e5472f5a1ce92
author iuc
date Wed, 25 Oct 2023 07:12:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:40f186d91e67
1 <tool id="m6anet" name="m6anet" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>to detect m6A RNA modifications from nanopore data</description>
3 <macros>
4 <token name="@TOOL_VERSION@">2.1.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">23.0</token>
7 </macros>
8 <xrefs>
9 <xref type="bio.tools">m6Anet</xref>
10 </xrefs>
11 <requirements>
12 <requirement type="package" version="@TOOL_VERSION@">m6anet</requirement>
13 </requirements>
14 <version_command>m6anet --version</version_command>
15 <command detect_errors="exit_code"><![CDATA[
16 m6anet dataprep
17 --out_dir ./dataprep_out
18 --n_processes \${GALAXY_SLOTS:-2}
19 --eventalign '$eventalign'
20 --readcount_min $readcount_min
21 --readcount_max $readcount_max
22 &&
23 m6anet inference
24 --input_dir ./dataprep_out
25 --out_dir ./inference_out
26 --n_processes \${GALAXY_SLOTS:-2}
27 --num_iterations $num_iterations
28 --pretrained_model $pretrained_model
29 --read_proba_threshold $read_proba_threshold
30 --batch_size $batch_size
31 ]]></command>
32 <inputs>
33 <param argument="--eventalign" type="data" format="tabular" label="Nanopolish eventalign file as input"/>
34 <param argument="--pretrained_model" type="select" label="Name of the pre-trained model" help="Algorithm makes use of a pre-trained AI model, whose parameters are needed to process your data. Multiple sets of such parameters are available, and the default was obtained on HCT116 cell line.">
35 <option value="HCT116_RNA002" selected="true">HCT116 (default)</option>
36 <option value="arabidopsis_RNA002">Arabidopsis RNA002</option>
37 <option value="HEK293T_RNA004">HEK293T RNA004</option>
38 </param>
39 <param argument="--readcount_min" type="integer" value="1" min="1" label="Minimum read counts per gene" help="Used during pre-processing of nanopolish eventalign input."/>
40 <param argument="--readcount_max" type="integer" value="1000" min="1" label="Maximum read counts per gene" help="Used during pre-processing of nanopolish eventalign input."/>
41 <param argument="--num_iterations" type="integer" value="5" min="0" label="Number of sampling iterations to perform" help="m6Anet will sample 20 reads from each candidate site and average the probability of modification across several round of sampling according to this parameter."/>
42 <param argument="--batch_size" type="integer" value="64" min="1" label="Batch size" help="Number of sites to be loaded each time for inference"/>
43 <param argument="--read_proba_threshold" type="float" value="0.033379376" min="0" max="1" label="Probability threshold" help="Threshold for each individual read to be considered modified during stoichiometry calculation"/>
44 </inputs>
45 <outputs>
46 <data name="indiv_proba_csv" format="tabular" from_work_dir="./inference_out/data.indiv_proba.csv" label="${tool.name} on ${on_string}: read_probs">
47 <actions>
48 <action name="column_names" type="metadata"
49 default="transcript_id,transcript_position,read_index,probability_modified"/>
50 </actions>
51 </data>
52 <data name="site_proba_csv" format="tabular" from_work_dir="./inference_out/data.site_proba.csv" label="${tool.name} on ${on_string}: site_probs">
53 <actions>
54 <action name="column_names" type="metadata"
55 default="transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio"/>
56 </actions>
57 </data>
58 </outputs>
59 <tests>
60 <test expect_num_outputs="2">
61 <param name="eventalign" value="eventalign.txt"/>
62 <param name="readcount_min" value="20"/>
63 <param name="batch_size" value="256"/>
64 <output name="indiv_proba_csv">
65 <assert_contents>
66 <has_n_columns n="4" sep=","/>
67 <has_line line="transcript_id,transcript_position,read_index,probability_modified"/>
68 <has_text_matching expression="ENST00000222329.8,2631,10.*,0.2.*"/>
69 <has_text_matching expression="ENST00000523944.5,3348,10.*,0.2.*"/>
70 </assert_contents>
71 </output>
72 <output name="site_proba_csv" file="site_proba.csv" compare="sim_size">
73 <assert_contents>
74 <has_n_columns n="6" sep=","/>
75 <has_line line="transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio"/>
76 <has_text_matching expression="ENST00000499810.6,1901,90,0.9.*,GGACT,0.9.*"/>
77 <has_text_matching expression="ENST00000373365.4,723,130,0.9.*,GGACT,0.7.*"/>
78 </assert_contents>
79 </output>
80 </test>
81 <!-- Same as above, but attempt to specify non-default model -->
82 <test expect_num_outputs="2">
83 <param name="eventalign" value="eventalign.txt"/>
84 <param name="pretrained_model" value="arabidopsis_RNA002"/>
85 <output name="indiv_proba_csv">
86 <assert_contents>
87 <has_n_columns n="4" sep=","/>
88 <has_line line="transcript_id,transcript_position,read_index,probability_modified"/>
89 <has_text_matching expression="ENST00000523944.5,3348,.*,.*"/>
90 <has_text_matching expression="ENST00000499810.6,1901,.*,.*"/>
91 </assert_contents>
92 </output>
93 <output name="site_proba_csv">
94 <assert_contents>
95 <has_n_columns n="6" sep=","/>
96 <has_line line="transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio"/>
97 <has_text_matching expression="ENST00000499810.6,1901,90,0.9.*,GGACT,0.9.*"/>
98 <has_text_matching expression="ENST00000311922.3,546,31,0.9.*,GGACT,0.9.*"/>
99 </assert_contents>
100 </output>
101 </test>
102 </tests>
103 <help><![CDATA[
104 .. class:: infomark
105
106 **What it does**
107
108 m6anet leverages a Multiple Instance Learning framework to detect m6a modifications from Nanopore Direct RNA Sequencing data.
109
110 To detect m6A modifications from your direct RNA sequencing sample, provide a tabular output of nanopolish-eventalign tool here. Behind the scenes, this m6anet tool first pre-processes the segmented raw signal file using ‘m6anet dataprep’ and then executes 'm6anet inference' function on its output to assign a probability that a modified read or site exists, which are returned as two separate tabulars from the tool to the history.
111
112 m6Anet will sample 20 reads from each candidate site and average the probability of modification across several round of sampling according to the 'num_iterations parameter'. Note that this is a ML-based model that can be trained on different datasets, thereby optimising for different organisms or nanopores. The tool by default uses model parameters obtained by training on the human HCT116 cell line, multiple options are available, and the results should depend on which model parameters are used for the inference.
113
114
115 .. class:: infomark
116
117 **References**
118
119 More information is available on the `project website <https://m6anet.readthedocs.io/en/latest//>`_ and on the `Github repository <https://github.com/GoekeLab/m6anet>`_.
120 ]]>
121 </help>
122 <citations>
123 <citation type="doi">10.1038/s41592-022-01666-1</citation>
124 </citations>
125 </tool>
126