Repository 'm6anet'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/m6anet

Changeset 0:40f186d91e67 (2023-10-25)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/m6anet commit cfa942e434b3c39e70c06cf4968e5472f5a1ce92
added:
m6anet.xml
test-data/eventalign.txt
test-data/indiv_proba.csv
test-data/site_proba.csv
b
diff -r 000000000000 -r 40f186d91e67 m6anet.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/m6anet.xml Wed Oct 25 07:12:45 2023 +0000
[
@@ -0,0 +1,126 @@
+<tool id="m6anet" name="m6anet" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>to detect m6A RNA modifications from nanopore data</description>
+    <macros>
+        <token name="@TOOL_VERSION@">2.1.0</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">23.0</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">m6Anet</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">m6anet</requirement>
+    </requirements>
+    <version_command>m6anet --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[            
+        m6anet dataprep
+            --out_dir ./dataprep_out
+            --n_processes \${GALAXY_SLOTS:-2}
+            --eventalign '$eventalign'
+            --readcount_min $readcount_min
+            --readcount_max $readcount_max
+        &&
+        m6anet inference
+            --input_dir ./dataprep_out
+            --out_dir ./inference_out
+            --n_processes \${GALAXY_SLOTS:-2}
+            --num_iterations $num_iterations
+            --pretrained_model $pretrained_model
+            --read_proba_threshold $read_proba_threshold
+            --batch_size $batch_size
+        ]]></command>
+    <inputs>
+        <param argument="--eventalign" type="data" format="tabular" label="Nanopolish eventalign file as input"/>
+        <param argument="--pretrained_model" type="select" label="Name of the pre-trained model" help="Algorithm makes use of a pre-trained AI model, whose parameters are needed to process your data. Multiple sets of such parameters are available, and the default was obtained on HCT116 cell line.">
+            <option value="HCT116_RNA002" selected="true">HCT116 (default)</option>
+            <option value="arabidopsis_RNA002">Arabidopsis RNA002</option>
+            <option value="HEK293T_RNA004">HEK293T RNA004</option>
+        </param>
+        <param argument="--readcount_min" type="integer" value="1" min="1" label="Minimum read counts per gene" help="Used during pre-processing of nanopolish eventalign input."/>
+        <param argument="--readcount_max" type="integer" value="1000" min="1" label="Maximum read counts per gene" help="Used during pre-processing of nanopolish eventalign input."/>
+        <param argument="--num_iterations" type="integer" value="5" min="0" label="Number of sampling iterations to perform" help="m6Anet will sample 20 reads from each candidate site and average the probability of modification across several round of sampling according to this parameter."/>
+        <param argument="--batch_size" type="integer" value="64" min="1" label="Batch size" help="Number of sites to be loaded each time for inference"/>
+        <param argument="--read_proba_threshold" type="float" value="0.033379376" min="0" max="1" label="Probability threshold" help="Threshold for each individual read to be considered modified during stoichiometry calculation"/>
+    </inputs>
+    <outputs>
+        <data name="indiv_proba_csv" format="tabular" from_work_dir="./inference_out/data.indiv_proba.csv" label="${tool.name} on ${on_string}: read_probs">
+            <actions>
+                <action name="column_names" type="metadata"
+                    default="transcript_id,transcript_position,read_index,probability_modified"/>
+            </actions>
+        </data>
+        <data name="site_proba_csv" format="tabular" from_work_dir="./inference_out/data.site_proba.csv" label="${tool.name} on ${on_string}: site_probs">
+            <actions>
+                <action name="column_names" type="metadata"
+                    default="transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio"/>
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="eventalign" value="eventalign.txt"/>
+            <param name="readcount_min" value="20"/>
+            <param name="batch_size" value="256"/>
+            <output name="indiv_proba_csv">
+                <assert_contents>
+                    <has_n_columns n="4" sep=","/>
+                    <has_line line="transcript_id,transcript_position,read_index,probability_modified"/>
+                    <has_text_matching expression="ENST00000222329.8,2631,10.*,0.2.*"/>
+                    <has_text_matching expression="ENST00000523944.5,3348,10.*,0.2.*"/>
+                </assert_contents>
+            </output>
+            <output name="site_proba_csv" file="site_proba.csv" compare="sim_size">
+                <assert_contents>
+                    <has_n_columns n="6" sep=","/>
+                    <has_line line="transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio"/>
+                    <has_text_matching expression="ENST00000499810.6,1901,90,0.9.*,GGACT,0.9.*"/>
+                    <has_text_matching expression="ENST00000373365.4,723,130,0.9.*,GGACT,0.7.*"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Same as above, but attempt to specify non-default model -->
+        <test expect_num_outputs="2">
+            <param name="eventalign" value="eventalign.txt"/>
+            <param name="pretrained_model" value="arabidopsis_RNA002"/>
+            <output name="indiv_proba_csv">
+                <assert_contents>
+                    <has_n_columns n="4" sep=","/>
+                    <has_line line="transcript_id,transcript_position,read_index,probability_modified"/>
+                    <has_text_matching expression="ENST00000523944.5,3348,.*,.*"/>
+                    <has_text_matching expression="ENST00000499810.6,1901,.*,.*"/>
+                </assert_contents>
+            </output>
+            <output name="site_proba_csv">
+                <assert_contents>
+                    <has_n_columns n="6" sep=","/>
+                    <has_line line="transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio"/>
+                    <has_text_matching expression="ENST00000499810.6,1901,90,0.9.*,GGACT,0.9.*"/>
+                    <has_text_matching expression="ENST00000311922.3,546,31,0.9.*,GGACT,0.9.*"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+m6anet leverages a Multiple Instance Learning framework to detect m6a modifications from Nanopore Direct RNA Sequencing data.
+
+To detect m6A modifications from your direct RNA sequencing sample, provide a tabular output of nanopolish-eventalign tool here. Behind the scenes, this m6anet tool first pre-processes the segmented raw signal file using ‘m6anet dataprep’ and then executes 'm6anet inference' function on its output to assign a probability that a modified read or site exists, which are returned as two separate tabulars from the tool to the history.
+
+m6Anet will sample 20 reads from each candidate site and average the probability of modification across several round of sampling according to the 'num_iterations parameter'. Note that this is a ML-based model that can be trained on different datasets, thereby optimising for different organisms or nanopores. The tool by default uses model parameters obtained by training on the human HCT116 cell line, multiple options are available, and the results should depend on which model parameters are used for the inference.
+
+
+.. class:: infomark
+
+**References**
+
+More information is available on the `project website <https://m6anet.readthedocs.io/en/latest//>`_ and on the `Github repository <https://github.com/GoekeLab/m6anet>`_.
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1038/s41592-022-01666-1</citation>
+    </citations>
+</tool>
+
b
diff -r 000000000000 -r 40f186d91e67 test-data/eventalign.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/eventalign.txt Wed Oct 25 07:12:45 2023 +0000
b
b'@@ -0,0 +1,5000 @@\n+contig\tposition\treference_kmer\tread_index\tstrand\tevent_index\tevent_level_mean\tevent_stdv\tevent_length\tmodel_kmer\tmodel_mean\tmodel_stdv\tstandardized_level\tstart_idx\tend_idx\n+ENST00000361055.8\t551\tTGGAC\t82380\tt\t1093\t125.2\t2.364\t0.00299\tTGGAC\t118.05\t3.17\t2.07\t66904\t66913\n+ENST00000361055.8\t552\tGGACT\t82380\tt\t1094\t122.49\t4.523\t0.00764\tGGACT\t123.83\t4.79\t-0.26\t66881\t66904\n+ENST00000361055.8\t553\tGACTC\t82380\tt\t1095\t92.61\t2.74\t0.00498\tGACTC\t88.67\t2.73\t1.32\t66866\t66881\n+ENST00000361055.8\t546\tTGGAC\t82382\tt\t1388\t119.66\t3.6\t0.00332\tTGGAC\t118.05\t3.17\t0.47\t92966\t92976\n+ENST00000361055.8\t547\tGGACT\t82382\tt\t1389\t124.2\t8.115\t0.00498\tGGACT\t123.83\t4.79\t0.07\t92951\t92966\n+ENST00000361055.8\t548\tGACTG\t82382\tt\t1390\t89.41\t6.294\t0.00266\tGACTG\t88.63\t2.73\t0.26\t92943\t92951\n+ENST00000361055.8\t551\tTGGAC\t82382\tt\t1397\t122.52\t2.604\t0.01096\tTGGAC\t118.05\t3.17\t1.31\t92839\t92872\n+ENST00000361055.8\t552\tGGACT\t82382\tt\t1398\t123.01\t4.883\t0.0073\tGGACT\t123.83\t4.79\t-0.16\t92817\t92839\n+ENST00000361055.8\t553\tGACTC\t82382\tt\t1399\t91.8\t1.752\t0.00266\tGACTC\t88.67\t2.73\t1.07\t92809\t92817\n+ENST00000361055.8\t546\tTGGAC\t82383\tt\t1016\t121.96\t2.415\t0.01195\tTGGAC\t118.05\t3.17\t1.09\t53320\t53356\n+ENST00000361055.8\t547\tGGACT\t82383\tt\t1017\t124.89\t11.208\t0.01295\tGGACT\t123.83\t4.79\t0.2\t53281\t53320\n+ENST00000361055.8\t548\tGACTG\t82383\tt\t1023\t91.82\t1.534\t0.00398\tGACTG\t88.63\t2.73\t1.04\t53219\t53231\n+ENST00000361055.8\t551\tTGGAC\t82384\tt\t1063\t120.45\t8.426\t0.01328\tTGGAC\t118.05\t3.17\t0.68\t68346\t68386\n+ENST00000361055.8\t552\tGGACT\t82384\tt\t1064\t129.55\t1.564\t0.00299\tGGACT\t123.83\t4.79\t1.07\t68337\t68346\n+ENST00000361055.8\t553\tGACTC\t82384\tt\t1065\t94.19\t2.967\t0.00664\tGACTC\t88.67\t2.73\t1.81\t68317\t68337\n+ENST00000361055.8\t551\tTGGAC\t82386\tt\t888\t122.79\t3.01\t0.00531\tTGGAC\t118.05\t3.17\t1.33\t62973\t62989\n+ENST00000361055.8\t552\tGGACT\t82386\tt\t889\t131.81\t0.67\t0.00332\tGGACT\t123.83\t4.79\t1.48\t62963\t62973\n+ENST00000361055.8\t553\tGACTC\t82386\tt\t890\t96.07\t3.389\t0.00398\tGACTC\t88.67\t2.73\t2.4\t62951\t62963\n+ENST00000361055.8\t546\tTGGAC\t82387\tt\t1157\t128.15\t2.377\t0.00498\tTGGAC\t118.05\t3.17\t2.92\t54921\t54936\n+ENST00000361055.8\t547\tGGACT\t82387\tt\t1158\t111.45\t16.305\t0.00332\tGGACT\t123.83\t4.79\t-2.37\t54911\t54921\n+ENST00000361055.8\t548\tGACTG\t82387\tt\t1160\t90.55\t2.091\t0.00232\tGACTG\t88.63\t2.73\t0.64\t54891\t54898\n+ENST00000361055.8\t551\tTGGAC\t82387\tt\t1163\t122.5\t1.96\t0.00365\tTGGAC\t118.05\t3.17\t1.29\t54847\t54858\n+ENST00000361055.8\t552\tGGACT\t82387\tt\t1164\t122.5\t9.402\t0.00764\tGGACT\t123.83\t4.79\t-0.26\t54824\t54847\n+ENST00000361055.8\t553\tGACTC\t82387\tt\t1165\t92.39\t3.918\t0.00564\tGACTC\t88.67\t2.73\t1.25\t54807\t54824\n+ENST00000361055.8\t546\tTGGAC\t82388\tt\t1107\t119.11\t6.171\t0.00398\tTGGAC\t118.05\t3.17\t0.3\t67702\t67714\n+ENST00000361055.8\t547\tGGACT\t82388\tt\t1108\t126.93\t8.95\t0.00398\tGGACT\t123.83\t4.79\t0.58\t67690\t67702\n+ENST00000361055.8\t548\tGACTG\t82388\tt\t1109\t91.17\t2.58\t0.00896\tGACTG\t88.63\t2.73\t0.83\t67663\t67690\n+ENST00000361055.8\t551\tTGGAC\t82388\tt\t1118\t107.81\t9.756\t0.00232\tTGGAC\t118.05\t3.17\t-2.89\t67544\t67551\n+ENST00000361055.8\t552\tGGACT\t82388\tt\t1119\t124.95\t7.024\t0.01826\tGGACT\t123.83\t4.79\t0.21\t67489\t67544\n+ENST00000361055.8\t553\tGACTC\t82388\tt\t1120\t91.22\t2.41\t0.01295\tGACTC\t88.67\t2.73\t0.84\t67450\t67489\n+ENST00000361055.8\t551\tTGGAC\t82389\tt\t1088\t111.07\t2.746\t0.00863\tTGGAC\t118.05\t3.17\t-2.06\t69287\t69313\n+ENST00000361055.8\t552\tGGACT\t82389\tt\t1089\t125.97\t2.765\t0.00764\tGGACT\t123.83\t4.79\t0.42\t69264\t69287\n+ENST00000361055.8\t553\tGACTC\t82389\tt\t1090\t93.75\t6.801\t0.00398\tGACTC\t88.67\t2.73\t1.74\t69252\t69264\n+ENST00000361055.8\t546\tTGGAC\t82390\tt\t1368\t120.66\t2.666\t0.00996\tTGGAC\t118.05\t3.17\t0.74\t66154\t66184\n+ENST00000361055.8\t547\tGGACT\t82390\tt\t1369\t125.01\t6.486\t0.00498\tGGACT\t123.83\t4.79\t0.22\t66139\t66154\n+ENST00000361055.8\t548\tGACTG\t82390\tt\t1370\t93.43\t1.678\t0.00465\tGACTG\t88.63\t2.73\t1.58\t66125\t66139\n+ENST00000361055.8\t551\tTGGAC\t82390\tt\t1377\t124.71\t3.111\t0.01494\tTGGAC\t118.05\t3.17\t1.89\t65996\t66041\n+ENST00000361055.8\t552\tGGACT\t82390\tt\t1378\t110.49\t14.92\t0.00432\tGGACT\t123.83\t4.79\t-2.51\t65983\t65996\n+ENST00000361055.8\t553\tGACTC\t82390\tt\t1379\t90.95\t3.285\t0.00797\tGACTC\t88.67\t2.73\t0.75\t65959\t65983\n+ENST00000361055.8\t546\t'..b'\t86499\n+ENST00000298292.12\t2608\tTGGAC\t329462\tt\t2954\t116.41\t1.832\t0.00531\tTGGAC\t118.05\t3.17\t-0.48\t30787\t30803\n+ENST00000298292.12\t2609\tGGACT\t329462\tt\t2955\t115.48\t6.015\t0.0073\tGGACT\t123.83\t4.79\t-1.63\t30765\t30787\n+ENST00000298292.12\t2610\tGACTT\t329462\tt\t2956\t87.37\t2.696\t0.0073\tGACTT\t89.12\t2.73\t-0.6\t30743\t30765\n+ENST00000298292.12\t2608\tTGGAC\t329464\tt\t3877\t121.11\t3.9\t0.00564\tTGGAC\t118.05\t3.17\t0.84\t40560\t40577\n+ENST00000298292.12\t2609\tGGACT\t329464\tt\t3878\t120.71\t7.061\t0.00398\tGGACT\t123.83\t4.79\t-0.57\t40548\t40560\n+ENST00000298292.12\t2610\tGACTT\t329464\tt\t3880\t90.44\t1.292\t0.00299\tGACTT\t89.12\t2.73\t0.42\t40530\t40539\n+ENST00000298292.12\t2608\tTGGAC\t329465\tt\t2908\t120.58\t1.285\t0.00299\tTGGAC\t118.05\t3.17\t0.71\t29573\t29582\n+ENST00000298292.12\t2609\tGGACT\t329465\tt\t2909\t118.44\t1.96\t0.00332\tGGACT\t123.83\t4.79\t-1.0\t29563\t29573\n+ENST00000298292.12\t2610\tGACTT\t329465\tt\t2910\t86.21\t2.385\t0.00332\tGACTT\t89.12\t2.73\t-0.95\t29553\t29563\n+ENST00000298292.12\t2608\tTGGAC\t329466\tt\t3324\t119.17\t0.844\t0.00299\tTGGAC\t118.05\t3.17\t0.33\t38101\t38110\n+ENST00000298292.12\t2609\tGGACT\t329466\tt\t3325\t120.65\t4.238\t0.00266\tGGACT\t123.83\t4.79\t-0.61\t38093\t38101\n+ENST00000298292.12\t2610\tGACTT\t329466\tt\t3326\t89.53\t2.207\t0.00564\tGACTT\t89.12\t2.73\t0.14\t38076\t38093\n+ENST00000298292.12\t2608\tTGGAC\t329468\tt\t1181\t116.21\t4.706\t0.00631\tTGGAC\t118.05\t3.17\t-0.53\t19779\t19798\n+ENST00000298292.12\t2609\tGGACT\t329468\tt\t1182\t119.84\t4.968\t0.00299\tGGACT\t123.83\t4.79\t-0.77\t19770\t19779\n+ENST00000298292.12\t2610\tGACTT\t329468\tt\t1183\t90.21\t2.87\t0.0073\tGACTT\t89.12\t2.73\t0.37\t19748\t19770\n+ENST00000298292.12\t2608\tTGGAC\t329470\tt\t2414\t116.69\t2.071\t0.0239\tTGGAC\t118.05\t3.17\t-0.42\t45231\t45303\n+ENST00000298292.12\t2609\tGGACT\t329470\tt\t2415\t119.49\t3.73\t0.00797\tGGACT\t123.83\t4.79\t-0.89\t45207\t45231\n+ENST00000298292.12\t2610\tGACTT\t329470\tt\t2416\t89.49\t2.415\t0.01162\tGACTT\t89.12\t2.73\t0.13\t45172\t45207\n+ENST00000227507.2\t22\tCGGAC\t330123\tt\t87\t115.94\t1.87\t0.0073\tCGGAC\t120.9\t3.17\t-1.47\t216115\t216137\n+ENST00000227507.2\t23\tGGACT\t330123\tt\t88\t117.68\t6.089\t0.01394\tGGACT\t123.83\t4.79\t-1.21\t216073\t216115\n+ENST00000227507.2\t24\tGACTA\t330123\tt\t89\t86.38\t5.371\t0.00299\tGACTA\t90.24\t2.73\t-1.33\t216064\t216073\n+ENST00000227507.2\t3661\tAGGAC\t330123\tt\t8581\t117.63\t5.84\t0.00697\tAGGAC\t115.92\t3.17\t0.51\t46120\t46141\n+ENST00000227507.2\t3662\tGGACT\t330123\tt\t8582\t115.53\t5.358\t0.0083\tGGACT\t123.83\t4.79\t-1.63\t46095\t46120\n+ENST00000227507.2\t3663\tGACTC\t330123\tt\t8583\t85.04\t1.656\t0.00465\tGACTC\t88.67\t2.73\t-1.25\t46081\t46095\n+ENST00000227507.2\t948\tCGGAC\t330124\tt\t1932\t127.58\t3.676\t0.00398\tCGGAC\t120.9\t3.17\t1.91\t147725\t147737\n+ENST00000227507.2\t949\tGGACT\t330124\tt\t1933\t121.41\t10.654\t0.01262\tGGACT\t123.83\t4.79\t-0.46\t147687\t147725\n+ENST00000227507.2\t950\tGACTG\t330124\tt\t1934\t91.04\t3.251\t0.00398\tGACTG\t88.63\t2.73\t0.8\t147675\t147687\n+ENST00000227507.2\t3661\tAGGAC\t330124\tt\t7711\t113.86\t1.86\t0.00266\tAGGAC\t115.92\t3.17\t-0.59\t35282\t35290\n+ENST00000227507.2\t3662\tGGACT\t330124\tt\t7712\t113.2\t8.007\t0.0249\tGGACT\t123.83\t4.79\t-2.02\t35207\t35282\n+ENST00000227507.2\t3663\tGACTC\t330124\tt\t7713\t87.53\t1.343\t0.00631\tGACTC\t88.67\t2.73\t-0.38\t35188\t35207\n+ENST00000227507.2\t2238\tAGGAC\t330125\tt\t4558\t124.21\t3.677\t0.01262\tAGGAC\t115.92\t3.17\t2.32\t99334\t99372\n+ENST00000227507.2\t2239\tGGACT\t330125\tt\t4559\t114.76\t11.442\t0.00332\tGGACT\t123.83\t4.79\t-1.68\t99324\t99334\n+ENST00000227507.2\t2240\tGACTT\t330125\tt\t4560\t91.83\t1.617\t0.00564\tGACTT\t89.12\t2.73\t0.88\t99307\t99324\n+ENST00000227507.2\t3661\tAGGAC\t330125\tt\t7555\t110.57\t3.92\t0.00797\tAGGAC\t115.92\t3.17\t-1.49\t39703\t39727\n+ENST00000227507.2\t3662\tGGACT\t330125\tt\t7557\t111.91\t7.176\t0.00631\tGGACT\t123.83\t4.79\t-2.2\t39665\t39684\n+ENST00000227507.2\t3663\tGACTC\t330125\tt\t7558\t87.08\t3.05\t0.00398\tGACTC\t88.67\t2.73\t-0.52\t39653\t39665\n+ENST00000227507.2\t3661\tAGGAC\t330126\tt\t8945\t106.12\t2.861\t0.00232\tAGGAC\t115.92\t3.17\t-2.8\t38760\t38767\n+ENST00000227507.2\t3662\tGGACT\t330126\tt\t8946\t119.23\t10.071\t0.0093\tGGACT\t123.83\t4.79\t-0.87\t38732\t38760\n+ENST00000227507.2\t3663\tGACTC\t330126\tt\t8947\t89.3\t2.193\t0.00531\tGACTC\t88.67\t2.73\t0.21\t38716\t38732\n+ENST00000227507.2\t948\tCGGAC\t330128\tt\t1641\t120.92\t13.026\t0.01228\tCGGAC\t120.9\t3.17\t0.01\t117696\t117733\n'
b
diff -r 000000000000 -r 40f186d91e67 test-data/indiv_proba.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/indiv_proba.csv Wed Oct 25 07:12:45 2023 +0000
b
b'@@ -0,0 +1,5596 @@\n+transcript_id,transcript_position,read_index,probability_modified\n+ENST00000393394.5,130,966210,0.002652896801009774\n+ENST00000393394.5,130,966138,0.0004641234700102359\n+ENST00000393394.5,130,966140,0.00012340281682554632\n+ENST00000393394.5,130,966143,4.147307845414616e-05\n+ENST00000393394.5,130,966145,0.006463199388235807\n+ENST00000393394.5,130,966802,3.054723947570892e-06\n+ENST00000393394.5,130,966147,0.0003418931155465543\n+ENST00000393394.5,130,966795,0.003732140641659498\n+ENST00000393394.5,130,966148,0.00012009570491500199\n+ENST00000393394.5,130,966136,0.00031356114777736366\n+ENST00000393394.5,130,966150,0.007453507278114557\n+ENST00000393394.5,130,966154,0.1671844869852066\n+ENST00000393394.5,130,966792,0.006686098873615265\n+ENST00000393394.5,130,966157,0.0015339127276092768\n+ENST00000393394.5,130,966791,0.0002678607706911862\n+ENST00000393394.5,130,966158,0.0002013254415942356\n+ENST00000393394.5,130,966160,1.7140560885309242e-05\n+ENST00000393394.5,130,966161,0.0002337907353648916\n+ENST00000393394.5,130,966789,0.0020310278050601482\n+ENST00000393394.5,130,966153,0.0033567906357347965\n+ENST00000393394.5,130,966133,0.050693295896053314\n+ENST00000393394.5,130,966132,0.00013980608491692692\n+ENST00000393394.5,130,966131,0.0010007641976699233\n+ENST00000393394.5,130,966094,0.01789756305515766\n+ENST00000393394.5,130,966095,1.1837071269837907e-06\n+ENST00000393394.5,130,966819,0.04211675748229027\n+ENST00000393394.5,130,966098,0.0019571296870708466\n+ENST00000393394.5,130,966100,0.0007573427283205092\n+ENST00000393394.5,130,966101,1.9759438600885915e-06\n+ENST00000393394.5,130,966103,0.000337231089361012\n+ENST00000393394.5,130,966816,0.003276352072134614\n+ENST00000393394.5,130,966106,0.02229730598628521\n+ENST00000393394.5,130,966108,0.00011804768291767687\n+ENST00000393394.5,130,966109,0.004094226751476526\n+ENST00000393394.5,130,966110,8.353911835001782e-05\n+ENST00000393394.5,130,966111,2.657330514921341e-05\n+ENST00000393394.5,130,966114,2.9840937713743187e-05\n+ENST00000393394.5,130,966115,0.005843206308782101\n+ENST00000393394.5,130,966124,0.00021711693261750042\n+ENST00000393394.5,130,966125,8.410857117269188e-05\n+ENST00000393394.5,130,966815,0.09812656044960022\n+ENST00000393394.5,130,966810,0.09258224815130234\n+ENST00000393394.5,130,966164,0.0008906738366931677\n+ENST00000393394.5,130,966779,0.01233267318457365\n+ENST00000393394.5,130,966165,8.327077921421733e-06\n+ENST00000393394.5,130,966166,0.004988881293684244\n+ENST00000393394.5,130,966220,0.0317571721971035\n+ENST00000393394.5,130,966221,0.0024086241610348225\n+ENST00000393394.5,130,966223,0.047890253365039825\n+ENST00000393394.5,130,966224,0.00021106710482854396\n+ENST00000393394.5,130,966763,0.0002595328551251441\n+ENST00000393394.5,130,966228,0.0016289303312078118\n+ENST00000393394.5,130,966762,2.532245525799226e-05\n+ENST00000393394.5,130,966231,6.542351184180006e-05\n+ENST00000393394.5,130,966232,0.000732739339582622\n+ENST00000393394.5,130,966233,3.8470850995508954e-05\n+ENST00000393394.5,130,966234,0.0015706156846135855\n+ENST00000393394.5,130,966235,0.009445944800972939\n+ENST00000393394.5,130,966236,0.005618339404463768\n+ENST00000393394.5,130,966237,0.00034666300052776933\n+ENST00000393394.5,130,966238,0.005356188863515854\n+ENST00000393394.5,130,966239,6.870560355309863e-06\n+ENST00000393394.5,130,966243,1.1203931116199328e-08\n+ENST00000393394.5,130,966245,0.00022200003149919212\n+ENST00000393394.5,130,966758,3.608944462030195e-05\n+ENST00000393394.5,130,966217,0.0026539729442447424\n+ENST00000393394.5,130,966825,0.0752301886677742\n+ENST00000393394.5,130,966215,2.798397690639831e-05\n+ENST00000393394.5,130,966213,0.0010050226701423526\n+ENST00000393394.5,130,966778,0.0038017502520233393\n+ENST00000393394.5,130,966171,0.00039351574378088117\n+ENST00000393394.5,130,966172,0.005401217844337225\n+ENST00000393394.5,130,966178,0.00047704760800115764\n+ENST00000393394.5,130,966776,6.029185533407144e-05\n+ENST00000393394.5,130,966183,0.0001155152713181451\n+ENST00000393394.5,130,966186,0.00'..b'ENST00000626223.2,1417,962133,2.15000327443704e-05\n+ENST00000626223.2,1417,962121,0.0006923539331182837\n+ENST00000626223.2,1417,962123,0.003455337369814515\n+ENST00000626223.2,1417,962124,0.0013730002101510763\n+ENST00000626223.2,1417,962125,0.00048559371498413384\n+ENST00000626223.2,1417,962126,0.07292302697896957\n+ENST00000626223.2,1417,962132,0.00013039512850809842\n+ENST00000626223.2,1417,962128,0.005897011607885361\n+ENST00000626223.2,1417,962111,0.0030634941067546606\n+ENST00000626223.2,1417,962088,0.0016143042594194412\n+ENST00000626223.2,1417,962091,0.29848265647888184\n+ENST00000626223.2,1417,962086,0.0021648965775966644\n+ENST00000626223.2,1417,962052,0.295176237821579\n+ENST00000626223.2,1417,962053,3.9781505620339885e-05\n+ENST00000626223.2,1417,962055,0.41697198152542114\n+ENST00000626223.2,1417,962056,0.019609948620200157\n+ENST00000626223.2,1417,962057,0.28471776843070984\n+ENST00000626223.2,1417,962058,0.0015051751397550106\n+ENST00000626223.2,1417,962060,0.03407472372055054\n+ENST00000626223.2,1417,962064,0.02325056493282318\n+ENST00000626223.2,1417,962067,0.0027014510706067085\n+ENST00000626223.2,1417,962070,0.0012049301294609904\n+ENST00000626223.2,1417,962071,0.023938270285725594\n+ENST00000626223.2,1417,962156,0.06799466907978058\n+ENST00000626223.2,1417,962154,2.1490004655788653e-06\n+ENST00000626223.2,1417,962087,0.18999730050563812\n+ENST00000626223.2,1417,962075,0.0008427370339632034\n+ENST00000626223.2,1417,962130,0.00032248723437078297\n+ENST00000626223.2,1417,962149,0.016086382791399956\n+ENST00000626223.2,1417,962144,0.004423717502504587\n+ENST00000626223.2,1417,962085,6.961916369618848e-05\n+ENST00000626223.2,1417,962084,0.0032535488717257977\n+ENST00000626223.2,1417,962082,0.0001539468066766858\n+ENST00000626223.2,1417,962080,0.014471097849309444\n+ENST00000626223.2,1417,962079,0.001917013549245894\n+ENST00000626223.2,1417,962145,0.00046667654532939196\n+ENST00000626223.2,1417,962078,0.00018249584536533803\n+ENST00000626223.2,1417,962077,1.5058026292535942e-05\n+ENST00000626223.2,1417,962148,0.0006507554207928479\n+ENST00000626223.2,1746,962133,0.07593128830194473\n+ENST00000626223.2,1746,962147,0.002669559558853507\n+ENST00000626223.2,1746,962149,0.1832713782787323\n+ENST00000626223.2,1746,962148,0.23597103357315063\n+ENST00000626223.2,1746,962139,0.1780920773744583\n+ENST00000626223.2,1746,962132,0.06618550419807434\n+ENST00000626223.2,1746,962142,0.2600573003292084\n+ENST00000626223.2,1746,962159,0.00046879504225216806\n+ENST00000626223.2,1746,962131,0.4001924991607666\n+ENST00000626223.2,1746,962150,0.04278097301721573\n+ENST00000626223.2,1746,962130,0.0009542864863760769\n+ENST00000626223.2,1746,962136,0.39377492666244507\n+ENST00000626223.2,1746,962051,0.2850414216518402\n+ENST00000626223.2,1746,962127,0.11620774865150452\n+ENST00000626223.2,1746,962052,0.006599955260753632\n+ENST00000626223.2,1746,962072,0.0345349945127964\n+ENST00000626223.2,1746,962073,0.3622717559337616\n+ENST00000626223.2,1746,962074,0.36883318424224854\n+ENST00000626223.2,1746,962075,0.0009727009455673397\n+ENST00000626223.2,1746,962076,0.008389758877456188\n+ENST00000626223.2,1746,962077,0.26213398575782776\n+ENST00000626223.2,1746,962078,0.053079329431056976\n+ENST00000626223.2,1746,962079,0.008349576964974403\n+ENST00000626223.2,1746,962081,0.00020545579900499433\n+ENST00000626223.2,1746,962085,0.2794467806816101\n+ENST00000626223.2,1746,962128,0.000362567778211087\n+ENST00000626223.2,1746,962090,2.4779433260846417e-06\n+ENST00000626223.2,1746,962101,0.28540146350860596\n+ENST00000626223.2,1746,962103,0.000519226654432714\n+ENST00000626223.2,1746,962107,0.21877337992191315\n+ENST00000626223.2,1746,962111,0.027107303962111473\n+ENST00000626223.2,1746,962113,0.27482685446739197\n+ENST00000626223.2,1746,962115,0.41422274708747864\n+ENST00000626223.2,1746,962117,0.03614037483930588\n+ENST00000626223.2,1746,962119,0.3651837408542633\n+ENST00000626223.2,1746,962124,0.0016373113030567765\n+ENST00000626223.2,1746,962126,0.31790292263031006\n+ENST00000626223.2,1746,962091,0.0003424300521146506\n'
b
diff -r 000000000000 -r 40f186d91e67 test-data/site_proba.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/site_proba.csv Wed Oct 25 07:12:45 2023 +0000
b
@@ -0,0 +1,102 @@
+transcript_id,transcript_position,n_reads,probability_modified,kmer,mod_ratio
+ENST00000393394.5,130,662,0.2202581316232681,GGACT,0.0981873111782477
+ENST00000393394.5,234,441,0.2501963078975677,GGACT,0.0907029478458050
+ENST00000222329.8,2631,23,0.3567893207073212,GGACT,0.1304347826086956
+ENST00000523944.5,3348,30,0.5557191371917725,GGACT,0.2333333333333333
+ENST00000262105.6,2540,20,0.3931991159915924,GGACT,0.2000000000000000
+ENST00000262105.6,3426,29,0.6103772521018982,GGACT,0.2068965517241379
+ENST00000394670.8,2043,41,0.7260062098503113,GGACT,0.3658536585365854
+ENST00000499810.6,1901,90,0.9853399395942688,GGACT,0.9222222222222223
+ENST00000499810.6,2750,24,0.2590899467468262,GGACT,0.1250000000000000
+ENST00000499810.6,3008,37,0.7827798128128052,GGACT,0.5135135135135135
+ENST00000373365.4,723,130,0.9848441481590271,GGACT,0.7692307692307693
+ENST00000373365.4,1205,76,0.6978975534439087,GGACT,0.2894736842105263
+ENST00000380668.9,2071,23,0.4891549348831177,GGACT,0.2173913043478261
+ENST00000265748.6,3272,20,0.4029413163661957,GGACT,0.1000000000000000
+ENST00000368097.8,1308,49,0.4595046341419220,GGACT,0.2448979591836735
+ENST00000311922.3,273,36,0.9890720844268799,GGACT,0.9166666666666666
+ENST00000311922.3,352,31,0.9165738224983215,GGACT,0.5161290322580645
+ENST00000311922.3,546,31,0.9603326916694641,GGACT,0.8709677419354839
+ENST00000311922.3,754,31,0.5318874716758728,GGACT,0.2580645161290323
+ENST00000311922.3,1069,40,0.9072993993759155,GGACT,0.6750000000000000
+ENST00000311922.3,3406,89,0.9774743914604187,GGACT,0.8089887640449438
+ENST00000311922.3,3498,40,0.9874091744422913,GGACT,0.7750000000000000
+ENST00000338825.4,330,65,0.1943143606185913,GGACT,0.0923076923076923
+ENST00000296412.12,838,65,0.4225453138351440,GGACT,0.1384615384615385
+ENST00000340384.4,368,66,0.4107578098773956,GGACT,0.1818181818181818
+ENST00000340384.4,488,108,0.8754017353057861,GGACT,0.4537037037037037
+ENST00000340384.4,574,104,0.9210290908813477,GGACT,0.5865384615384616
+ENST00000457309.2,1885,24,0.7738892436027527,GGACT,0.4166666666666667
+ENST00000298292.12,2611,25,0.9342290163040161,GGACT,0.9200000000000000
+ENST00000227507.2,2241,33,0.6662369370460510,GGACT,0.2727272727272727
+ENST00000227507.2,3664,108,0.9251167178153992,GGACT,0.5277777777777778
+ENST00000350028.4,1595,24,0.9505048990249634,GGACT,0.6250000000000000
+ENST00000374595.8,166,20,0.5066616535186768,GGACT,0.3500000000000000
+ENST00000374595.8,1071,23,0.2806355655193329,GGACT,0.1304347826086956
+ENST00000374595.8,1786,26,0.2028520256280899,GGACT,0.0769230769230769
+ENST00000262225.7,1321,87,0.6040331125259399,GGACT,0.2413793103448276
+ENST00000450253.6,2023,44,0.7291089892387390,GGACT,0.2954545454545455
+ENST00000450253.6,2088,27,0.8862818479537964,GGACT,0.4814814814814815
+ENST00000450253.6,2269,84,0.9864088892936707,GGACT,0.9047619047619048
+ENST00000372839.7,1532,41,0.9585621356964111,GGACT,0.6097560975609756
+ENST00000337392.9,558,28,0.2025025784969330,GGACT,0.0714285714285714
+ENST00000352482.8,1333,26,0.9877531528472900,GGACT,0.9615384615384616
+ENST00000352482.8,1395,28,0.9343129992485046,GGACT,0.5714285714285714
+ENST00000340513.4,3033,42,0.9952387809753418,GGACT,1.0000000000000000
+ENST00000232888.6,1374,29,0.3711816370487213,GGACT,0.1034482758620690
+ENST00000258975.6,951,27,0.7738332748413086,GGACT,0.3333333333333333
+ENST00000251413.7,264,40,0.6202264428138733,GGACT,0.3000000000000000
+ENST00000251413.7,505,30,0.2851565778255463,GGACT,0.1333333333333333
+ENST00000251413.7,1534,77,0.9868885874748230,GGACT,0.8831168831168831
+ENST00000258383.3,699,20,0.6646413207054138,GGACT,0.2000000000000000
+ENST00000258383.3,1316,20,0.1779935210943222,GGACT,0.0500000000000000
+ENST00000367142.4,481,27,0.2336638718843460,GGACT,0.0370370370370370
+ENST00000367142.4,1510,26,0.9076765775680542,GGACT,0.7307692307692307
+ENST00000373062.7,1210,27,0.2000236809253693,GGACT,0.0740740740740741
+ENST00000373062.7,1383,31,0.5209900736808777,GGACT,0.2580645161290323
+ENST00000373062.7,1979,23,0.4619059562683105,GGACT,0.2608695652173913
+ENST00000405805.5,2415,28,0.7868381142616272,GGACT,0.3571428571428572
+ENST00000356000.7,1140,32,0.9578318595886230,GGACT,0.8125000000000000
+ENST00000617731.2,464,185,0.1546680480241776,GGACT,0.0756756756756757
+ENST00000354675.10,6112,23,0.9841457009315491,GGACT,0.7826086956521739
+ENST00000367627.7,2024,33,0.9947395324707031,GGACT,0.9696969696969697
+ENST00000367627.7,2048,27,0.9935660958290100,GGACT,0.8888888888888888
+ENST00000519065.5,261,27,0.6987373232841492,GGACT,0.2222222222222222
+ENST00000519065.5,1951,57,0.9731091856956482,GGACT,0.8596491228070176
+ENST00000216129.6,1091,23,0.8455310463905334,GGACT,0.3043478260869565
+ENST00000432462.6,526,37,0.7197095751762390,GGACT,0.3783783783783784
+ENST00000326427.10,430,27,0.3374719619750977,GGACT,0.1111111111111111
+ENST00000326427.10,453,22,0.2899298071861267,GGACT,0.2272727272727273
+ENST00000368149.2,1250,21,0.5231421589851379,GGACT,0.2857142857142857
+ENST00000368149.2,2112,46,0.9528524279594421,GGACT,0.6739130434782609
+ENST00000368149.2,2165,32,0.9834513664245605,GGACT,0.6562500000000000
+ENST00000368149.2,2737,33,0.9637023210525513,GGACT,0.6969696969696970
+ENST00000254799.10,585,35,0.2712623178958893,GGACT,0.1428571428571428
+ENST00000254799.10,885,61,0.2885415554046631,GGACT,0.1147540983606557
+ENST00000254799.10,933,21,0.7477120161056519,GGACT,0.4285714285714285
+ENST00000254799.10,965,45,0.3009000420570374,GGACT,0.1111111111111111
+ENST00000254799.10,1625,53,0.7655946016311646,GGACT,0.3962264150943396
+ENST00000254799.10,1721,86,0.9900733232498169,GGACT,0.8953488372093024
+ENST00000215375.6,133,48,0.4071687459945679,GGACT,0.0833333333333333
+ENST00000215375.6,787,53,0.9324858188629150,GGACT,0.4905660377358491
+ENST00000248879.7,688,27,0.9644709825515747,GGACT,0.7037037037037037
+ENST00000248879.7,889,42,0.9571337103843689,GGACT,0.6666666666666666
+ENST00000335007.9,301,48,0.5563411712646484,GGACT,0.2083333333333333
+ENST00000335007.9,465,77,0.8410034179687500,GGACT,0.3766233766233766
+ENST00000335007.9,1190,106,0.9765372872352600,GGACT,0.7641509433962265
+ENST00000373232.7,662,172,0.3497287333011627,GGACT,0.1976744186046512
+ENST00000373232.7,740,126,0.2644290626049042,GGACT,0.1031746031746032
+ENST00000279281.7,141,25,0.4770812094211578,GGACT,0.2400000000000000
+ENST00000279281.7,1689,28,0.4453878402709961,GGACT,0.2142857142857143
+ENST00000279281.7,2030,31,0.1925014108419418,GGACT,0.0967741935483871
+ENST00000279281.7,2283,39,0.9835131764411926,GGACT,0.8717948717948718
+ENST00000221566.6,421,20,0.4708485007286072,GGACT,0.3000000000000000
+ENST00000221566.6,1188,35,0.9298388957977295,GGACT,0.6285714285714286
+ENST00000470450.5,1369,48,0.3350095450878143,GGACT,0.1041666666666667
+ENST00000396024.7,536,45,0.2646224200725555,GGACT,0.0888888888888889
+ENST00000634960.1,965,22,0.0539428219199181,GGACT,0.0000000000000000
+ENST00000634960.1,1360,24,0.6504774689674377,GGACT,0.3333333333333333
+ENST00000634960.1,1394,28,0.2388863712549210,GGACT,0.0714285714285714
+ENST00000634960.1,2171,29,0.2318001240491867,GGACT,0.1034482758620690
+ENST00000626223.2,1417,62,0.5923939347267151,GGACT,0.1935483870967742
+ENST00000626223.2,1746,38,0.9583113193511963,GGACT,0.6315789473684210