Repository 'cherri_eval'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/cherri_eval

Changeset 0:7c6c282ecf5a (2022-12-09)
Next changeset 1:5dd6b8c4ee1e (2022-12-12)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cherri commit f9348123725f421ddbdbd8d372d038da4880dbac
added:
all_fasta.loc.sample
cherri_eval.xml
macros.xml
test-data/context_150_model.tgz
test-data/context_150_st_off.npz
test-data/evaluation_results_st_on.csv
test-data/genome.fa
test-data/genome.fa.fai
test-data/genome.sizes
test-data/mixed_context_150_st_off.npz
test-data/test_evaluate_rris.csv
test-data/train_1.fa
test-data/train_1_len.tabular
test-data/train_1_pos.tabular
test-data/train_2.fa
test-data/train_2_len.tabular
test-data/train_2_pos.tabular
test-data/training_data_context_150_st_on.npz
test-data/training_data_mixed_context_100_st_on.npz
b
diff -r 000000000000 -r 7c6c282ecf5a all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r 7c6c282ecf5a cherri_eval.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cherri_eval.xml Fri Dec 09 17:40:27 2022 +0000
[
@@ -0,0 +1,87 @@
+<tool id="cherri_eval" name="Evaluation of RRIs using CheRRI" version="@VERSION@" profile="@PROFILE@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        export PYTHONHASHSEED=31337 &&
+        ln -s '$ref_source.genome_fasta' genome.fa &&
+        mkdir model_dir &&
+        tar -C model_dir -xvf '$model_tar' > /dev/null &&
+        cherri eval 
+        -i1 '$rris_table'
+        -g genome.fa
+        -l '$chrom_len_file'
+        -o . 
+        -on cherri_eval
+        -c '$context'
+        -st $use_structure
+        -hf $hand_feat
+        -m 'model_dir/final_full.model'
+        -mp 'model_dir/features.npz'
+        #if $occupied_regions:
+            -i2 '$occupied_regions'
+        #end if
+        #if $intarna_param_file:
+            -p '$intarna_param_file'
+        #end if
+    ]]></command>
+    <inputs>
+        <param name="rris_table" type="data" format="csv" label="CSV file containing all RRIs that should be evaluated" />
+        <expand macro="reference_source_conditional"/>
+        <param name="model_tar" type="data" format="tgz" label="Model and feature files in a tar" />
+        <param name="context" type="integer" value="150" label="How much context should be added at up- and downstream of each sequence" />
+        <param name="use_structure" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Set 'off' if you want to disable structure, default 'on'" />
+        <param name="hand_feat" type="boolean" truevalue="on" falsevalue="off" checked="false" label="If you want to start from hand-curated feature files, use this for evaluating test set performance. Default: 'off'" />
+        <param name="occupied_regions" optional="True" type="data" format="binary" label="Occupied regions python object file containing a dictionary" />
+        <param name="intarna_param_file" optional="True" type="data" format="txt" label="IntaRNA parameters file" />
+    </inputs>
+    <outputs>
+        <data name="eval_out" format="csv" from_work_dir="cherri_eval/evaluation/evaluation_results_eval_rri.csv" label="Evaluation results on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="rris_table" value="test_evaluate_rris.csv"/>
+            <conditional name="ref_source">
+                <param name="ref_source_selector" value="history"/>
+                <param name="genome_fasta" value="genome.fa"/>
+            </conditional>  
+            <param name="chrom_len_file" value="genome.sizes" />
+            <param name="model_tar" value="context_150_model.tgz" />
+            <output name="eval_out" file="evaluation_results_st_on.csv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**What it does**
+
+CheRRI detects functional RNA-RNA interaction (RRI) sites, by evaluating if an interaction site most likely occurs in nature. It helps to filter interaction sites generated either experimentally or by an RRI prediction algorithm, by removing false positive interactions.
+
+**Inputs**
+        
+CheRRI takes a table of RNA-RNA interactions. It supports output of ChiRA or a csv files of interactions in the following format. The table needs the following header line:
+        
+    `chrom1,start1,stop1,strand1,chrom2,start2,stop2,strand2`
+        
+Following the header line, each subsequent line represents an RRI, with chromosome ID (format: 1,2,3 ...), interaction start, interaction end, and strand ("+" or "-") of the two interacting partners. For example, you might want to evaluate the following three RRI sites::
+        
+    19,18307518,18307539,-,14,90454500,90454521,+
+    X,109054541,109054590,+,9,89178539,89178562,-
+    10,123136102,123136122,+,5,1245880,1245902,+
+
+The tool also additionally takes a genome FASTA file and a chromosome lengths file. User can select an in-built FASTA or a FASTA file from history.
+A chromosome legths file is tabular file where each line contains a tab separated chromosome name and its length. Followin is an example:
+
+    chr1    23200231
+    chr2    12497572
+    chr3    8387920
+
+CheRRI also needs a model file (in binary format). It can be generated by using CheRRI train tool.
+
+**Outputs**
+
+This tool outputs a tabular file containing predictions for each input interaction.
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 7c6c282ecf5a macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Dec 09 17:40:27 2022 +0000
[
@@ -0,0 +1,65 @@
+<macros>
+    <token name="@VERSION@">0.7</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">cherri</requirement>
+        </requirements>
+    </xml>
+    <macro name="reference_source_conditional">
+        <conditional name="ref_source">
+            <param name="ref_source_selector" type="select" label="Reference genome in FASTA format">
+                <option value="cached">locally cached</option>
+                <option value="history">in your history</option>
+            </param>
+            <when value="cached">
+                <param name="genome_fasta" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                        <validator type="no_options" message="No indices are available." />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Select a reference dataset in FASTA format" />
+            </when>
+        </conditional>
+        <param name="chrom_len_file" type="data" format="tabular" label="Two column tabular file containing chromosome lengths" help="Format: 'chrom name' \t 'chrom length'" />        
+    </macro>
+    <token name="@COMMONPARAMS@" ><![CDATA[
+            #if $intarna_param_file: 
+                -p '$intarna_param_file'
+            #end if
+            -c '$context'
+            -st $use_structure
+            -t '$run_time'
+            -me "\${GALAXY_MEMORY_MB_PER_SLOT:-8000}"
+            -j "\${GALAXY_SLOTS:-1}"
+        ]]></token>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_0080</edam_topic>
+            <edam_topic>topic_0081</edam_topic>
+            <edam_topic>topic_0160</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_2995</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">cherri</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">cherri</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <yield />
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/context_150_model.tgz
b
Binary file test-data/context_150_model.tgz has changed
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/context_150_st_off.npz
b
Binary file test-data/context_150_st_off.npz has changed
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/evaluation_results_st_on.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/evaluation_results_st_on.csv Fri Dec 09 17:40:27 2022 +0000
b
b'@@ -0,0 +1,6 @@\n+target_ID,query_ID,instance_score,predicted_label,E_hybrid,ED1,ED2,len_interaction_target,len_interaction_query,no_bps,max_inter_len,inter_len_normby_bp,bp_normby_inter_len,sum_ED,mfe_normby_GC,max_ED_normby_GC,mfe_normby_len,max_ED_normby_len,max_ED_normby_GC_len,complex_target_site,complex_query_site,103,218,3965,4390,4479,5431,5795,6434,6626,6635,7552,7808,8077,8174,11632,14000,15997,18340,19158,20963,21262,21694,27390,29287,29554,32863,33178,33547,34282,34422,34488,37176,37477,37532,38032,38039,38210,38612,38772,39174,40128,40201,40431,40441,41157,41484,42542,42839,43658,44532,45157,46069,46829,47317,49554,49956,52245,52435,53754,54604,56416,56450,56861,57646,59007,59023,59235,60144,60775,61153,61593,62084,62119,62321,63264,63533,63886,64400,66975,68842,69673,70066,70539,70993,71500,72034,72770,72893,76620,77916,79250,81006,83486,83553,84211,84686,85177,88601,88775,89371,89540,89800,91132,96396,96533,99879,100150,100319,101025,102113,102117,102491,104186,104635,105777,106659,107362,107877,110927,111135,111913,112240,113099,113123,115953,116421,116995,117587,118534,118785,119888,119955,120357,121948,123283,123341,125626,125838,126094,126867,128188,128428,130250,130651,131729,131834,133411,133417,134749,135445,135610,136332,136664,137596,139199,140424,141658,143218,143223,144347,144588,146175,146217,148182,148717,148840,148968,150801,150956,151321,152152,152399,152543,153917,154459,154546,154657,155274,156328,156681,157727,158027,158053,158868,159127,159265,162328,162485,163775,165891,166307,166970,167231,167372,167583,167628,167961,167971,168188,168851,169312,170207,170500,170699,172282,172357,173965,174219,174727,174947,179377,181689,181814,182942,183668,184345,184806,185260,185297,185808,186013,186285,186857,186871,187824,187840,189082,192986,193099,193860,194876,195156,195170,195325,195449,196564,197600,197675,199001,199931,200065,200863,201409,203349,204997,206423,206678,207027,207715,210220,210871,212552,212616,214127,214790,215521,215789,216063,217850,217874,218567,219482,220128,220256,220427,221245,222067,222931,223613,223729,224656,225488,226419,227603,229088,229857,230101,231806,231841,231882,232458,233010,234231,234297,234449,234564,235569,237392,239574,241915,242858,246680,247158,247200,248248,249295,250145,251764,251957,252112,252166,252543,252640,254243,255793,257183,257455,258465,258994,259160,259568,259702,261161\n+chr2;+;471;520,chr1;+;471;494,0.986328125,1,-37.81,13.81,14.42,28.0,26.0,22.0,28.0,1.2727272727272727,0.7857142857142857,28.23,-12.617560975609756,18.99219512195122,-0.3421428571428571,0.515,0.2927368421052631,0.8276981148050341,0.89515469985041,0.0,0.0,0.012033098262942524,0.1453102558740324,0.03899835921291347,0.012033098262942524,0.03865871097078121,0.0,0.0,0.0,0.02410794191116372,0.021589468902926984,0.0,0.011459299381339733,0.0,0.0,0.0,0.019329355485390606,0.0,0.0,0.0,0.0,0.06875579628803839,0.0,0.012033098262942524,0.0,0.0,0.0,0.0,0.07731742194156242,0.0,0.021589468902926984,0.0,0.06817197287688598,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05798806645617181,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1321611062215401,0.0,0.0,0.0,0.015149327305974661,0.04317893780585397,0.0,0.011396882813649601,0.017332604094628206,0.02741755834653258,0.0,0.04273831055118601,0.03865871097078121,0.0,0.04317893780585397,0.0,0.022793765627299203,0.022918598762679467,0.0,0.019052740945631412,0.11966726954332081,0.0,0.0,0.008666302047314103,0.0170953242204744,0.14732713480433976,0.02406619652588505,0.022918598762679467,0.0,0.0,0.028492207034124,0.0,0.0,0.0,0.019329355485390606,0.0,0.0,0.0,0.0,0.0,0.02963759702653775,0.0,0.0,0.0,0.011396882813649601,0.0,0.0,0.0,0.0,0.0,0.09089596383584797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017332604094628206,0.0,0.0,0.015149327305974661,0.0,0.0,0.008666302047314103,0.0,0.0,0.0,0.0,0.0,0.03466520818925641,0.0,0.0,0.0,0.13336918661941988,0.05798806645617181,0.008666302047314103,0.0,0.008666302047314103,0.019052740945631412,0.016935769729450142,0.0,0.0227239909589'..b'0.0,0.022414815323599863,0.0,0.0,0.025592820752959072,0.0,0.0,0.0,0.0,0.0,0.07375151068512625,0.0,0.0,0.0,0.038464825356495314,0.0,0.0,0.0,0.0,0.0,0.012821608452165106,0.0,0.0,0.0,0.0,0.03838923112943861,0.012821608452165106,0.0,0.0,0.0,0.12440772281977645,0.03687575534256313,0.012821608452165106,0.0,0.012821608452165106,0.058544810738718325,0.021954304027019374,0.0,0.0,0.0,0.0,0.0,0.03362222298539979,0.0,0.0,0.11938301635704579,0.0,0.0,0.0,0.0,0.0,0.13904392550445605,0.0,0.0,0.0,0.0,0.038464825356495314,0.0,0.0,0.0,0.0,0.0,0.03687575534256313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04317552468775519,0.0,0.0,0.0,0.03245772149499368,0.0,0.0,0.0,0.0,0.03687575534256313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029272405369359163,0.0,0.03687575534256313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02564321690433021,0.0,0.0,0.0,0.0,0.03687575534256313,0.11938301635704579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029272405369359163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04482963064719973,0.0,0.0,0.0,0.0,0.0,0.0,0.028783683125170125,0.0,0.0,0.0,0.03245772149499368,0.0,0.028409853192133627,0.0,0.0,0.0,0.0,0.0,0.0,0.08965926129439945,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12171645560622629,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09948584696420483\n+chr2;+;1782;1804,chr1;+;1796;1816,0.951171875,1,-25.59,8.3,5.24,11.0,11.0,11.0,11.0,1.0,1.0,13.54,-13.952631578947368,9.610526315789476,-1.0954545454545457,0.7545454545454546,0.4048780487804878,0.7455574250275929,0.6203352658383443,0.02111001654603745,0.0,0.034862044947607744,0.14619433492165843,0.0519830880014004,0.017431022473803872,0.0,0.0,0.0,0.0,0.0,0.014549074861400571,0.0,0.0,0.027495167063805316,0.0,0.0,0.027495167063805316,0.0,0.0,0.0,0.0,0.07697043394160114,0.0,0.0,0.0,0.0,0.017431022473803872,0.0,0.05499033412761063,0.0,0.0,0.0,0.024361505139877827,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019242608485400284,0.05499033412761063,0.0,0.017431022473803872,0.0,0.0,0.0,0.0,0.0,0.0,0.129957720003501,0.0,0.0,0.0,0.0,0.014549074861400571,0.0,0.0,0.008663848000233398,0.03848521697080057,0.0,0.01624381499129538,0.0,0.0,0.014549074861400571,0.0,0.03248762998259076,0.0,0.0,0.010153263325828915,0.12995051993036305,0.0,0.0,0.008663848000233398,0.010829209994196922,0.14728541600396777,0.0,0.019242608485400284,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0281741623204647,0.0,0.0,0.0,0.0,0.0,0.0,0.027495167063805316,0.0,0.0,0.0,0.0,0.019242608485400284,0.0,0.06333004963811235,0.0,0.0,0.0,0.05499033412761063,0.0,0.0,0.0,0.008663848000233398,0.0,0.0,0.02111001654603745,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034655392000933594,0.0,0.0,0.0,0.13199242323577592,0.10998066825522126,0.0,0.0,0.0,0.010153263325828915,0.0,0.0,0.0,0.0,0.0,0.0,0.02614653371070581,0.0,0.0,0.13094167375260515,0.0,0.0,0.0,0.0,0.0,0.15229894988743373,0.0,0.014549074861400571,0.0,0.0,0.017327696000466797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0844400661841498,0.0,0.0,0.06333004963811235,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06333004963811235,0.0,0.0,0.0,0.010829209994196922,0.0,0.0,0.03637268715350144,0.0,0.027495167063805316,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06972408989521549,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05499033412761063,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019242608485400284,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02111001654603745,0.0,0.0,0.0,0.0,0.0422200330920749,0.0,0.0,0.0,0.0,0.0,0.0,0.008663848000233398,0.010829209994196922,0.0,0.0,0.0,0.0,0.13094167375260515,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010829209994196922,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010153263325828915,0.0,0.0,0.014549074861400571,0.0,0.0,0.0,0.017431022473803872,0.06972408989521549,0.0,0.0,0.0,0.0,0.0,0.0,0.02111001654603745,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.067349129698901,0.0,0.0,0.0,0.0,0.06972408989521549,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12995051993036305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.11639259889120457\n'
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/genome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fa Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,4 @@
+>chr1
+GGCUCAGGGAAGGACGACAGCUCCCGGCUCAGUCCUCAGCCUCCAACCCCCACUCUUCAAUCUCCUUCCCAGACCAAUGACCCCCUCUGCGGGAGAACCUGCGGCGAAAGGCAGAGCCCCAGAGCAGGGUCUGGAGUGGCAGGACCGGCAGCCAAUGAAGGUGAAGGCCGAGCAUUACAUCAUCACCGUGGUCUCCCAUUGGUUUACAUCCUGGGCUUCCUGACUCCGCCUCCCAGCCUUAACAAUGGGCCUCUCUGCUCCCCAGGCUUCUGACCCUGGUGCCAGCUUUGCCCAGAAGGCCACAGUCGGGCAGAGUCCUGGACUUUUGCAAAGCAAACGUGCAACCCCAAGCAGCGGUCUCCCGGGCCGGGGCUGCGCGGCCGCCGCUGUGCUGGCUUUUAACGGUGGGAGGGCACCAUCCUCUUGCUCUGCUCUCGUUCUCCAGAAGGCUGUCCCGGGGCCCCCACUCUCCGUCCCGCUCCGGGGACAGUGGCUCGCCUGCUAUGCGCGGCAGCCCGCGCCGGGGCCGGCACCAGCAGCGCCCGGGCGGAUGCAGCGAGCCCACGGAGGGGCAUGCUUCCACGCACCAAGUAUAACCGCUUCAGGAAUGACUCGGUGACAUCGGUCGAUGACCUUCUCCACAGCAUUCCCAUCCAAGUCCGGGUGGGCAGGUGGCUCUUGCCCCUUGGAUUGAGGAGCACGGGGGUCAGCCUCACGGCCCAGGGUGGCCGGCGCCAGCUAAUGAGGCUGUGGUCCCGCAGGAGCUGUUCCCCUCGCGUCAGGAGAAGCUCUACCCGGGCUGGGCGCGCGCCGCCUGUGUGCUGCUGUCCUUGCUGCCCGUGCUGUGGGUCCCGGUGGCCGCGCUUGCUCAGCUGCUCACCCGGCGGAGGCGGACGUGGAGGGACAGGGACGCGCGCCCAGACACGGACAUGCGCCCGGACACGGACACGCGCCCAGACACGGACAAGUUCCGUUUAUGGUCUGAUUUCCGGCCUCUCGCCUGCUCGCCCCGCCGCCCGCCUGUCCCGCUCCCUCCCUCCCGGGGACCCGGAGGAGAGGGGACCAUGCCGGAACCCGGGCCGGACGCUGCCGGCACCGCCAGCGCACAGCCCCAACCGCCGCCGCCCCCCCCACCCGCUCCCAAGGAGUCCCCGUUCUCCAUCAAGAACCUGCUCAACGGAGACCACCACCGGCCGCCCCCUAAGCCUCAGCCGCCCCCACGGACGCUCUUCGCGCCAGCCUCGGCUGCCGCCGCCGCCGCCGCUGCCGCUGCCGCGGCGGCCAAGCAGGGCCUGGUGGCGAGAGCGCGGCUGUCACUGCGCCCGAGCAUCCCAGAGCUUUCCGAGCGGACGAGCCGGCCGUGCCGGGCAUCCCCAGCCUCGCUACCCUCGCAGCACACGUCGAGCCCCGCACAGGCGAGGGUCCGGAACUUAGCCCAAAGCACGUUUCCCCUGGCAGCGCAGGAAACGCCCGGCCGCGCGCCGGCGCACGCCCCCCUCUCCUCCUUUGUUCCGGGGGUCGGCGGCCGCUCUCCUGCCAGCGUCGGGAUCUCGGCCCCGGGAGGCGGGCCGUCGGGCGCAGCCGCGAAGAUGCCGUUGGAACUGACGCAGAGCCGAGUGCAGAAGAUCUGGGUGCCCGUGGACCACGGACUACACUUCCCAGGAUGCGUCGGGGGAAGAGGCCACAGGGACAGGCAGAGGGAGGGGGCGGCCCCAGUCCUCCGGGGCGUCGCUGUUUCCAAGCGGCGAGAUCUCGCCUAGUAACCGGCAGCUGAGGUUCACUAGCGCGGCGUUGUGGCCCCGCCCCCGGAGCCCCGGCUGGAGGAAGAACCUGUCUCCCGCGGCGCCCCAGUGACGAAGUCCAUCCCCGGGUGCGGGAGGCGGGGAGGGGAGGGCAGGCAGAGGAUUUCGCUCGCUCGCCCGCGCGGGAUCCGGGACGCGCCGGUGGCGUCCGCUGGCAGCAGAGGCUCAGCCCCGACAAGGCGGCCGAGGUGCUGAGCCAGGGCGCUCACCUGGCGGCCGGGCCCGACGGCCGGACCAUCGACCGUUUCUCUCCCACCUAGAGCGCCCCUCGCCAGCCCGCUCUGUCGCUGCUGCGCGGCCCUGGCCCGCACCCCAGGGAGCGGCGGGGGCGGCGCGCAGGGCCCACUGUGCCCGGGACAACCGCAGCGUCGCCACAGUGGCGGCUCCACCUCUCGGCGGCCUCACCUGGCCUCACUGCUUCGUGCCUUAGCUCGGGGGUCGGGGGAGAACCCCGGGACGGGGGUGGGAUGGGGUAAGGGAAAUUUAUA
+>chr2
+CAGUCACCACCUCACACAUGAGGAAGCUGAGGCUGGAGACUUAGGUGACCUGGCCUAGAGUUUCAGGGCGCCGGGAUCCUGUCGAAGGUCCCACCCACCUGGCUCUGCUGCAGCGCCCUCUCUCUGUCCUCUAGGUGUGUUUGGUGGCCGGGGCCGAGGUGGGAUCCCGGGCACAGGCAGAGGCCAGCCAGAGAAGAAGCCUGGCAGACAGGCGGGCAAACAGUGAGCGCCCACCCAGACCGGCUGCUGCGCCCCCUCCUGCCAGGGUGGCGAUUCCGCUCCACAGUCUCGGACGGAUCUGCUCAGAAAGGAAGAGGCAGGUCUGGCACGGUGAAGAGACGUGAGAGGUGUAGAAUAAGUGGGAGGCCCCCGGCGCCCCCCGGUGUCCCCGCGAGGGGUCCAGGGCGGGGUCUGCCGGCCCUGCGGGCCGCCGGUGAAAUACCACUACUCUGAUCGUUUUUUCACUGACCCGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGCGCCCGGCCGCACGCAAAAGAGCUCCUGAAGGAAGCGCUAAACAUGGAAAGGAACAACGGGUACCAGCCGCUGCAAAAUCAUGCCAAAAUGUAAAGACCAUCGAGACUAGGAAGAAACUGCAUCAACUAACGAGCAAAAUCACCAGCUAAAGUUCCGUUUAUGGUCUGAUUUCCGGCCUCUCGCCUGCUCGCCCCGCCGCCCGCCUGUCCCGCUCCCUCCCUCCCGGGGACCCGGAGGAGAGGGGACCAUGCCGGAACCCGGGCCGGACGCUGCCGGCACCGCCAGCGCACAGCCCCAACCGCCGCCGCCCCCCCCACCCGCUCCCAAGGAGUCCCCGUUCUCCAUCAAGAACCUGCUCAACGGAGACCACCACCGGCCGCCCCCUAAGCCUCAGCCGCCCCCACGGACGCUCUUCGCGCCAGCCUCGGCUGCCGCCGCCGCCGCCGCUGCCGCUGCCGCGGCGGCCAAGCAGGCAUGCGCUGCCUAGCCCCGUGUGGUUAACAUGUCCGUAUUUAAAGAACACACAAUUAAUUGCCUAAAAGCAUUAUCCUUCUCAGCACUCUCCUAUCCUUCAGAUCUAACCUUUAGGCACUGCCGGCUGCCGGCGGGGUCCAGGACCCAGCGGGGCUGGGCGCGCGGAGCAGCGCUGGGUGCAGCGCCUGCGCCGGCAGCUGCAAGGGCCGCAGAAAUUAAACGCGAGUGUCCAGAAUCGGGCAUUGUGGGUUUAAAUCCUGAAUCCACUGGUCACUAUCUGAGACUUUCCCCUGUCACAAAUUAGUUACCUUCUUACUUUUGCAAAGCAAACGUGCAACCCCAAGCAGCGGUCUCCCGGGCCGGGGCUGCGCGGCCGCCGCUGUGCUGGCUUUUAACGGUGGGAGGGCACCAUCCUCUUGCUCUGCUCUCGUUCUCCAGAAGGCUGUCCCGGGGCCCCCACUCUCCGUCCCGCUCCGGGGACAGUGGCUCGCCUGCUAUGCGCGGCAGCCCGCGCCGGGGCCGGCACCAGCAGCGCCCGGGCGGAUGCAGCGAGCCCACGGAGGGGCAUGCUUCCACGCACCAAGUAUAACCGCUUCAGGAAUGACUCGGUGACAUCGGUCGAUGACCUUCUCCACAGGCCUUGGCCCCGCCCCCGCCGCCGCGCAGGCGCUGACGCAAGCGCAGCAGGCGCGCGCUGUUUCCGGAAGUCGCGGCCGGCGUCACCGCUGCGGCUGCCUCAGCUACUGCCGCAGUCGCCGCGGAAUUCGGCGAGUAGAACCGCUGAGGCGGGCGCGGGCCCGGGUGGGGCCAAGGUUCCGGCCACUCUGCAGAAUGGAGAUAAUCAGGAGCAGUGCGUGUCCGCCGUACACCUCCCCGCCUCCCCACACCCGGCUGCCCCUGGCCUGCUAGGCCGAGACCAUGGCCGGCGUGUUUGGCUGGGCCUGGGGCUGAGGCAGCCCUGUUGUGUAUCACUCACUGAGGCUGGACAAGAGGGAAGGCGGAGAAGCCUGGCCACAUGUCUCCUGAGGGCUCCAGGCAGGGCCCUCUCACCUGCUGCCAGGGUCCCAGCCCGCAGGAGCUUCCCGUCCACCUCUGAACUCACGGUCCACAUGGCGCUGGAGCGUCGGGCACCAUCUACAGGGCUUGAGGCCAGCAGCCUAGCCUCUGGGUCCACUGGGGCAGGCAAGGUGAGUCCUGCUGUCUGUGGAGCCAGCGUGCUGGGCCGGGCAGGGGGCCUCGUUGGCUGGGGUGGCUGCAGCUCCGCCCUGCCUGGGGCAUU
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/genome.fa.fai
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.fa.fai Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,2 @@
+chr1 2290 6 2290 2291
+chr2 2274 2303 2274 2275
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/genome.sizes
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome.sizes Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,2 @@
+chr1 2292
+chr2 2276
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/mixed_context_150_st_off.npz
b
Binary file test-data/mixed_context_150_st_off.npz has changed
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/test_evaluate_rris.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_evaluate_rris.csv Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,8 @@
+chrom1,start1,stop1,strand1,chrom2,start2,stop2,strand2
+chr2,150,171,+,chr1,150,171,+
+chr2,471,520,+,chr1,471,494,+
+chr2,820,840,+,chr1,794,816,+
+chr2,1140,1159,+,chr1,1116,1136,+
+chr2,1459,1482,+,chr1,1436,1496,+
+chr2,1782,1804,+,chr1,1796,1816,+
+chr2,2104,2124,+,chr1,2116,2140,+
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/train_1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/train_1.fa Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,4 @@
+>chr1
+CCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCGCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCGGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGCCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCUGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACGCGCGUGCGCCCGAGCGCGGCCCGGUGGUCCCUCCCGGACAGGCGUUCGUGCGACGUGUGCGCGCGUGCGCCCGAGCGCGGCCCGGUGGUCCCUCCCGGACAGGCGUUCGUGCGACGUGUGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGANANANANACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAAGGGAGAAGGGUCGGGGCAGGGCCCCGGGGAGCCCGGCGGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCUGCCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCUGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGANACCCCGGGGAGCCCGGCGGGUAGGGGGCGGGCUCCGGCGCUGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGAGGGAGAAGGGUCGGGGCAGGGGUAGGGGGCGGGCUCCGGCGCUGCCCAGCGGGGCUGGGCGCGCGGACCCCGGGGAGCCCGGCGGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCUGCGGAUCCGAGUCACGGCACCAAACCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCUGGUAGGGGGCGGGCUCCGGCGCUGGUAGGGGGCGGGCUCCGGCGCUGGUAGGGGGCGGGCUCCGGCGCUGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCA
+>chr2
+GUUGCGGCUGGACGAGGCGCAGUGGCCGCGGCGCGAGCCGGGGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCGCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCGCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCACUGGUGAAGAGACAUGAGAGGAUGGGCGCGCGCCGGGCCCGUGCCGUUCCCUCCUCCUCCUCCCCUGCCGCGGCGAGCCGGGCCCUGGGGGCCCUUCCCGUGCGCGGGCGAGCCGGGCCCGAUGCCUUCCCCGUCCGUCCGUCCGUCCGUCCAUCCGUCCACCCCCGGGCCGGGCACCGCCCCCGGGCCGGGCACCGCGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGNANANANAGGACGGGCCCUUCCCGUGGCGCUGCGCGAGCCGGGCGCUGCGGCUGGAUGAGGCGCUCGCCCUCUCUCUCUCUCUCUCUGUCUCUCUCUCUCUCUGGAGCCGGGGGGGGGGGGGGGGGGGGGGGGGGGCAGGCGGCGCCGCCGCCCUCUCCCACCCCAGAAAAGGUGUUGGUUGAUAUAGACAGCAGGACGGUGGCCAUGGAAGUCAGAAUCCACUAAGGAGUGUGUAACAACUCACCUGCCGAAUCAACUAGCCCUGAAAAUGGAUGGCACUGGAGCAUCAGGCCCAUACCCGGCCAUCGCCAGCAGUCCGGCCGCCGCGGCGCGCGCCCGGGUCCCCCGCCGCCCCUCCGGUGCUCCCUCCCCACCCCGCAGUGAUGCCUUCCCCGUCCGUCCGUCCGUCCGUCCAUCCGUCCACGCCUCCCUCCCCACCCCGCGAGCCUCCCUCCCCACCCCGCGAGCCUCCCUCCCCACCCCGCGAGCCUCCCUCCCCACCCCGCGAGCCUCCCUCCCCACCCCGCGAAUACUUACCUGGCAGGGGAGAUACCAUGAUCACGAAGGUGGUUUUCCCAGGGCGAGGCUUAUCCAUUGCACUC
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/train_1_len.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/train_1_len.tabular Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,2 @@
+chr1 1888
+chr2 1652
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/train_1_pos.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/train_1_pos.tabular Fri Dec 09 17:40:27 2022 +0000
b
b'@@ -0,0 +1,25 @@\n+383\tchr1\t0\t23\t+\tchr2\t1\t24\t-\tGUUGCGGCUGGACGAGGCGCAGU\tCCCAGCGGGGCUGGGCGCGCGGA\t.((((((((.(.(....(((...&....)))..).).))).))))).\t-14.5\tUUGCGGCUGGACGAGGCGC\tGCGGGGCUGGGCGCGCGG\t2&5\tchr1\t1\t19\t+\tchr2\t5\t19\t-\t489.80\t3308.00\t3797.8\t1.0\t1.0\t1.0\tCDS\tmiRNA\tENST00000624358;ENST00000651965\tENST00000584178\n+164\tchr1\t23\t42\t-\tchr2\t24\t47\t-\tGGCCGCGGCGCGAGCCGGG\tCCCAGCGGGGCUGGGCGCGCGGA\t..(((((((.(.(((((((&))).....)))).))).))))).\t-23.9\tCCGCGGCGCGAGCCGGG\tCCCAGCGGGGCUGGGCGCGCGG\t3&1\tchr1\t25\t40\t-\tchr2\t24\t46\t-\t219.80\t3308.00\t3527.8\t1.0\t1.0\t1.0\tprotein_coding\tmiRNA\tENST00000257765;ENST00000423730;ENST00000433730;ENST00000610435\tENST00000584178\n+141\tchr1\t42\t90\t+\tchr2\t47\t97\t+\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCG\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\t(((((((.(((((.((.((((((((((.((((((((((((((......&)))))).))).)))))..)))))))))))).)))).).))))))).....\t-74.1\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCG\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCC\t1&1\tchr1\t42\t84\t+\tchr2\t47\t92\t+\t84.07\t73.88\t157.95\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000615959\tENST00000581792\n+141\tchr1\t90\t140\t+\tchr2\t97\t145\t+\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCG\t((((((.(((.(((((..((((((((((((.((((.(.(((((((.....&))))))).))))).)).)))))))))).))))))))))))))......\t-74.1\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCC\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCG\t1&1\tchr1\t90\t135\t+\tchr2\t97\t139\t+\t73.88\t84.07\t157.95\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000615959\tENST00000615959\n+141\tchr1\t140\t188\t+\tchr2\t145\t195\t+\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCG\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\t(((((((.(((((.((.((((((((((.((((((((((((((......&)))))).))).)))))..)))))))))))).)))).).))))))).....\t-74.1\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCG\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCC\t1&1\tchr1\t140\t182\t+\tchr2\t145\t190\t+\t84.07\t73.88\t157.95\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000581792\tENST00000615959\n+141\tchr1\t188\t238\t+\tchr2\t195\t243\t+\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCG\t((((((.(((.(((((..((((((((((((.((((.(.(((((((.....&))))))).))))).)).)))))))))).))))))))))))))......\t-74.1\tCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCC\tGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCG\t1&1\tchr1\t188\t233\t+\tchr2\t195\t237\t+\t73.88\t84.07\t157.95\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000581792\tENST00000581792\n+134\tchr1\t238\t263\t-\tchr2\t243\t292\t+\tACUGGUGAAGAGACAUGAGAGGAUG\tGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAG\t..(((((.((((...((((((....&.............................)))))).))))..)))))..\t-17\tUGGUGAAGAGACAUGAGAG\tCUCUCGCUUCUGGCGCCA\t3&30\tchr1\t240\t257\t-\tchr2\t272\t261\t+\t2216.00\t1579.00\t3795.0\t1.0\t1.0\t1.0\tlncRNA\tmiRNA\tENST00000667915\tENST00000611066\n+131\tchr1\t263\t285\t+\tchr2\t292\t315\t-\tGGCGCGCGCCGGGCCCGUGCCG\tCCCAGCGGGGCUGGGCGCGCGGA\t..((((((((.(((((.(((..&....)))))))).))))))))..\t-35.6\tCGCGCGCCGGGCCCGUGC\tGCGGGGCUGGGCGCGCG\t3&5\tchr1\t265\t281\t+\tchr2\t296\t309\t-\t161.60\t3308.00\t3469.6\t1.0\t0.84\t0.84\tCDS\tmiRNA\tENST00000369800;ENST00000522853\tENST00000584178\n+92\tchr1\t285\t305\t-\tchr2\t315\t338\t+\tUUCCCUCCUCCUCCUCCCCU\tGUAGGGGGCGGGCUCCGGCGCUG\t..(((.((((((.(......&).)))))).)))...........\t-15.3\tCCCUCCUCCUCC\tGUAGGGGGCGGG\t3&1\tchr1\t287\t297\t-\tchr2\t315\t327\t+\t124.00\t5122.00\t5246.0\t1.0\t1.0\t1.0\tlncRNA\tmiRNA\tENST00000607278\tENST00000577388\n+75\tchr1\t305\t323\t-\tchr2\t338\t361\t-\tGCCGCGGCGAGCCGGGCC\tCCCAGCGGGGCUGGGCGCGCGGA\t.(((((.(..(((.((((&........)))).))))))))).\t-22.4\tCCGCGGCGAGCCGGGCC\tGGCUGGGCGCGCGG\t2&9\tchr1\t306\t322\t-\tchr2\t346\t352\t-\t104.40\t3308.00\t3412.4\t1.0\t1.0\t1.0\tprotein_coding\tmiRNA\tENST00000371984\tENST00000584178\n+72\tchr1\t323\t341\t+\tchr2\t361\t384\t-\tCUGGGGGCCCUUCCCGUG\tCCCAGCGGGGCUGGGCGCGCGGA\t(((.(.((((.((((((.&....))))))..)))).).))).\t-25.7\tCUGGGGGCCCUUCCCGU\tGCGGGGCUGGGCGCGCGG\t1&5\tchr1\t323\t340\t+\tchr2\t365\t379\t-\t99.15\t3308.00\t3407.15\t1.0\t0.84\t0.84\tlncRNA\tmiRNA\tENST00000609544\tENST00000584178\n+66\tchr1\t341\t359\t+\tchr2\t384\t407\t-\tCGCGGGCGAGCCGGGCCC\tCCCAGCGGGGCUGGGCGCGCGGA\t.....(((.(((.(((((&.......))))).))).)))...\t-23.6\tGCGAGCCGGGCCC\tGGGCU'..b'GG\t1&4\tchr1\t401\t419\t+\tchr2\t433\t456\t+\t88.57\t68.87\t157.44\t1.0\t1.0\t1.0\tprotein_coding\tmiRNA\tENST00000343605\tENST00000614492\n+51\tchr1\t419\t437\t+\tchr2\t491\t552\t+\tCCCCGGGCCGGGCACCGC\tCGCGCGUGCGCCCGAGCGCGGCCCGGUGGUCCCUCCCGGACAGGCGUUCGUGCGACGUGUG\t(((((((((((((.((((&...))).).))))......))))))).))................................\t-29.9\tCCCCGGGCCGGGCACCGC\tGCGUGCGCCCGAGCGCGGCCCGGUGG\t1&4\tchr1\t419\t437\t+\tchr2\t494\t517\t+\t88.57\t68.87\t157.44\t1.0\t1.0\t1.0\tprotein_coding\tmiRNA\tENST00000343605\tENST00000577708\n+37\tchr1\t437\t590\t+\tchr2\t552\t575\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t540\t466\t+\tchr2\t552\t573\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000612463\tENST00000584178\n+37\tchr1\t590\t743\t+\tchr2\t575\t598\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t693\t619\t+\tchr2\t575\t596\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000610460\tENST00000584178\n+37\tchr1\t743\t896\t+\tchr2\t598\t621\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t846\t772\t+\tchr2\t598\t619\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000613359\tENST00000584178\n+37\tchr1\t896\t1049\t+\tchr2\t621\t644\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t999\t925\t+\tchr2\t621\t642\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000619471\tENST00000584178\n+37\tchr1\t1049\t1202\t+\tchr2\t644\t667\t-\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tchr1\t1049\t1202\t+\tchr2\t644\t667\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000611446\tENST00000584178\n+37\tchr1\t1202\t1355\t+\tchr2\t667\t690\t-\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tchr1\t1202\t1355\t+\tchr2\t667\t690\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000619779\tENST00000584178\n+37\tchr1\t1355\t1508\t+\tchr2\t690\t713\t-\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tchr1\t1355\t1508\t+\tchr2\t690\t713\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000616292\tENST00000584178\n+37\tchr1\t1508\t1661\t+\tchr2\t713\t736\t-\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tchr1\t1508\t1661\t+\tchr2\t713\t736\t-\t10440.00\t3308.00\t13748.0\t1.0\t0.84\t0.84\trRNA\tmiRNA\tENST00000618998\tENST00000584178\n+36\tchr1\t1661\t1679\t+\tchr2\t736\t759\t-\tGGACGGGCCCUUCCCGUG\tCCCAGCGGGGCUGGGCGCGCGGA\t.(.((.((((.((((((.&....))))))..)))).)))...\t-22.5\tGACGGGCCCUUCCCGU\tGCGGGGCUGGGCGCGC\t2&5\tchr1\t1662\t1677\t+\tchr2\t740\t752\t-\t50.24\t3308.00\t3358.24\t1.0\t0.84\t0.84\tCDS\tmiRNA\tENST00000305943\tENST00000584178\n+32\tchr1\t1679\t1697\t-\tchr2\t759\t782\t-\tGCGCUGCGCGAGCCGGGC\tCCCAGCGGGGCUGGGCGCGCGGA\t...((((((..(((.(((&.........))).))))))))).\t-20.7\tCUGCGCGAGCCGGGC\tGCUGGGCGCGCGG\t4&10\tchr1\t1682\t1694\t-\tchr2\t768\t772\t-\t43.63\t3308.00\t3351.63\t1.0\t1.0\t1.0\tCDS\tmiRNA\tENST00000269503;ENST00000585159\tENST00000584178\n'
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/train_2.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/train_2.fa Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,4 @@
+>chr1
+CCCAGCGGGGCUGGGCGCGCGGAGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCCCCAGCGGGGCUGGGCGCGCGGACGCGCGUGCGCCCGAGCGCGGCCCGGUGGUCCCUCCCGGACAGGCGUUCGUGCGACGCGCGUGCGCCCGAGCGCGGCCCGGUGGUCCCUCCCGGACAGGCGUUCGUGCGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCGCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAUCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCGCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAUNANANANACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGUAGGGGGCGGGCUCCGGCGCGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAAGGGAGAAGGGUCGGGGCAGNANACCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGCCCCGGGGAGCCCGGCGGNANANANAGUAGGGGGCGGGCUCCGGCGCCCCAGCGGGGCUGGGCGCGCGGACCCAGCGGGGCUGGGCGCGCGGAGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAGCCCAGCGGGGCUGGGCGCGCGGACC
+>chr2
+GUUGCGGCUGGACGAGGCGCAGCUGGUGAAGAGACAUGAGAGGAUGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUGAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCGGCUGCGGCUGGAUGAGGCGGGGCCGCGGCGCGAGCCGGGCUGGGGGCCCUUCCCGUGGUUCCCUCCUCCUCCUCCCCUGGCGCGCGCCGGGCCCGUGCCGCCCCGGGCCGGGCACCGCCCCCGGGCCGGGCACCGCGCCGCGGCGAGCCGGGCCGGACGGGCCCUUCCCGUGUGCCUUCCCCGUCCGUCCGUCCGUCCGUCCAUCCGUCCACCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAUUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCGCGCGACUGCGGCGGCGGUGGUGG
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/train_2_len.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/train_2_len.tabular Fri Dec 09 17:40:27 2022 +0000
b
@@ -0,0 +1,2 @@
+chr1 1416
+chr2 1180
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/train_2_pos.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/train_2_pos.tabular Fri Dec 09 17:40:27 2022 +0000
b
b'@@ -0,0 +1,25 @@\n+386\tchr1\t0\t22\t+\tchr2\t1\t24\t-\tGUUGCGGCUGGACGAGGCGCAG\tCCCAGCGGGGCUGGGCGCGCGGA\t.((((((((.(.(....(((..&....)))..).).))).))))).\t-14.5\tUUGCGGCUGGACGAGGCGC\tGCGGGGCUGGGCGCGCGG\t2&5\tchr1\t1\t19\t+\tchr2\t5\t19\t-\t527.40\t2400.00\t2927.4\t1.0\t1.0\t1.0\tCDS\tmiRNA\tENST00000624358;ENST00000651965\tENST00000584178\n+156\tchr1\t22\t45\t-\tchr2\t24\t73\t+\tCUGGUGAAGAGACAUGAGAGGAU\tGGUGAGGCGGGGGGGCGAGCCCUGAGGGGCUCUCGCUUCUGGCGCCAAG\t.(((((.((((...((((((...&.............................)))))).))))..)))))..\t-17\tUGGUGAAGAGACAUGAGAG\tCUCUCGCUUCUGGCGCCA\t2&30\tchr1\t23\t41\t-\tchr2\t53\t42\t+\t2419.00\t1304.00\t3723.0\t1.0\t1.0\t1.0\tlncRNA\tmiRNA\tENST00000667915\tENST00000611066\n+102\tchr1\t45\t95\t+\tchr2\t73\t130\t+\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUG\tAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCG\t((((((((((((.(((((.((.((((((((((.(((((((((((((((..&.....))))))).))).)))))..)))))))))))).)))).).)))))))))))).\t-90.1\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGG\tCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\t1&6\tchr1\t45\t93\t+\tchr2\t78\t124\t+\t54.44\t50.81\t105.25\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000615959\tENST00000581792\n+102\tchr1\t95\t152\t+\tchr2\t130\t180\t+\tAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCG\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUG\t.....(((((((.(((.(((((..((((((((((((.((((.(.((((((((((((.&)))))))))))).))))).)).)))))))))).)))))))))))))))..\t-90.1\tCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGG\t6&1\tchr1\t100\t146\t+\tchr2\t130\t178\t+\t50.81\t54.44\t105.25\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000615959\tENST00000615959\n+102\tchr1\t152\t202\t+\tchr2\t180\t237\t+\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUG\tAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCG\t((((((((((((.(((((.((.((((((((((.(((((((((((((((..&.....))))))).))).)))))..)))))))))))).)))).).)))))))))))).\t-90.1\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGG\tCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\t1&6\tchr1\t152\t200\t+\tchr2\t185\t231\t+\t54.44\t50.81\t105.25\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000581792\tENST00000615959\n+102\tchr1\t202\t259\t+\tchr2\t237\t287\t+\tAGGCCCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGCG\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGGUG\t.....(((((((.(((.(((((..((((((((((((.((((.(.((((((((((((.&)))))))))))).))))).)).)))))))))).)))))))))))))))..\t-90.1\tCCGCGCGUGUGUCCCGGCUGCGGUCGGCCGCGCUCGAGGGGUCCCCGUGGC\tGCCGCGGGGAUCGCCGAGGGCCGGUCGGCCGCCCCGGGUGCCGCGCGG\t6&1\tchr1\t207\t253\t+\tchr2\t237\t285\t+\t50.81\t54.44\t105.25\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000581792\tENST00000581792\n+86\tchr1\t259\t280\t-\tchr2\t287\t310\t-\tGCUGCGGCUGGAUGAGGCGGG\tCCCAGCGGGGCUGGGCGCGCGGA\t.((((((((.(.....(((((&))).)).....).))).))))).\t-18.6\tCUGCGGCUGGAUGAGGCGGG\tCCCAGCGGGGCUGGGCGCGCGG\t2&1\tchr1\t260\t279\t-\tchr2\t287\t309\t-\t124.00\t2400.00\t2524.0\t1.0\t1.0\t1.0\tCDS\tmiRNA\tENST00000338754;ENST00000398110;ENST00000403880;ENST00000442495\tENST00000584178\n+65\tchr1\t280\t298\t-\tchr2\t310\t333\t-\tGCCGCGGCGCGAGCCGGG\tCCCAGCGGGGCUGGGCGCGCGGA\t.(((((((.(.(((((((&))).....)))).))).))))).\t-23.9\tCCGCGGCGCGAGCCGGG\tCCCAGCGGGGCUGGGCGCGCGG\t2&1\tchr1\t281\t297\t-\tchr2\t310\t332\t-\t89.73\t2400.00\t2489.73\t1.0\t1.0\t1.0\tprotein_coding\tmiRNA\tENST00000257765;ENST00000423730;ENST00000433730;ENST00000610435\tENST00000584178\n+45\tchr1\t298\t316\t+\tchr2\t333\t356\t-\tCUGGGGGCCCUUCCCGUG\tCCCAGCGGGGCUGGGCGCGCGGA\t(((.(.((((.((((((.&....))))))..)))).).))).\t-25.7\tCUGGGGGCCCUUCCCGU\tGCGGGGCUGGGCGCGCGG\t1&5\tchr1\t298\t315\t+\tchr2\t337\t351\t-\t67.94\t2400.00\t2467.94\t1.0\t0.83\t0.83\tlncRNA\tmiRNA\tENST00000609544\tENST00000584178\n+42\tchr1\t316\t337\t-\tchr2\t356\t377\t+\tGUUCCCUCCUCCUCCUCCCCU\tGUAGGGGGCGGGCUCCGGCGC\t((((((.((((((.(......&).)))))).)))....)))..\t-15.4\tGUUCCCUCCUCCUCC\tGUAGGGGGCGGGCUCCGGC\t1&1\tchr1\t316\t331\t-\tchr2\t356\t375\t+\t53.16\t5016.00\t5069.16\t1.0\t1.0\t1.0\tlncRNA\tmiRNA\tENST00000607278\tENST00000577388\n+40\tchr1\t337\t359\t+\tchr2\t377\t400\t-\tGGCGCGCGCCGGGCCCGUGCCG\tCCCAGCGGGGCUGGGCGCGCGGA\t..((((((((.(((((.(((..&....)))))))).))))))))..\t-35.6\tCGCGCGCCGGGCCCGUGC\tGCGGGGCUGGGCGCGCG\t3&5\tchr1\t339\t355\t+\tchr2\t381\t394\t-\t54.01\t2400.00\t2454.01\t1.0\t0.83\t0.83\tCDS\tmiRNA\tENST00000369800;'..b'NA\tENST00000356861;ENST00000425528;ENST00000450764;ENST00000589149\tENST00000577388\n+20\tchr1\t471\t510\t+\tchr2\t577\t612\t+\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAU\tUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\t(((((..((((((((((.((.((((((.(((........&.))).)))))).)))))))).)))).....)))))\t-49.4\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCC\tGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\t1&2\tchr1\t471\t502\t+\tchr2\t578\t611\t+\t42.67\t25.71\t68.38\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000615959\tENST00000581792\n+20\tchr1\t510\t545\t+\tchr2\t612\t651\t+\tUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAU\t.(((.((((((.(((((((.(((((.....(((((&)))))..)))))))))).)).)))))).)))........\t-49.4\tGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCC\t2&1\tchr1\t511\t544\t+\tchr2\t612\t643\t+\t25.71\t42.67\t68.38\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000615959\tENST00000615959\n+20\tchr1\t545\t698\t+\tchr2\t651\t674\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t648\t574\t+\tchr2\t651\t672\t-\t10610.00\t2400.00\t13010.0\t1.0\t0.83\t0.83\trRNA\tmiRNA\tENST00000612463\tENST00000584178\n+20\tchr1\t698\t851\t+\tchr2\t674\t697\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t801\t727\t+\tchr2\t674\t695\t-\t10610.00\t2400.00\t13010.0\t1.0\t0.83\t0.83\trRNA\tmiRNA\tENST00000610460\tENST00000584178\n+20\tchr1\t851\t1004\t+\tchr2\t697\t720\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t954\t880\t+\tchr2\t697\t718\t-\t10610.00\t2400.00\t13010.0\t1.0\t0.83\t0.83\trRNA\tmiRNA\tENST00000613359\tENST00000584178\n+20\tchr1\t1004\t1157\t+\tchr2\t720\t743\t-\tGACUCUUAGCGGUGGAUCACUCGGCUCGUGCGUCGAUGAAGAACGCAGCUAGCUGCGAGAAUUAAUGUGAAUUGCAGGACACAUUGAUCAUCGACACUUCGAACGCACUUGCGGCCCCGGGUUCCUCCCGGGGCUACGCCUGUCUGAGCGUCG\tCCCAGCGGGGCUGGGCGCGCGGA\t.......................................................................................................(((.(.(.((((((((..........(((.....................&)))..)))))))).).).)))..\t-26.5\tCGCACUUGCGGCCCCGGGUUCCUCCCGGG\tCCCAGCGGGGCUGGGCGCGCG\t104&1\tchr1\t1107\t1033\t+\tchr2\t720\t741\t-\t10610.00\t2400.00\t13010.0\t1.0\t0.83\t0.83\trRNA\tmiRNA\tENST00000619471\tENST00000584178\n+20\tchr1\t1157\t1196\t+\tchr2\t743\t778\t+\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAU\tUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\t(((((..((((((((((.((.((((((.(((........&.))).)))))).)))))))).)))).....)))))\t-49.4\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCC\tGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\t1&2\tchr1\t1157\t1188\t+\tchr2\t744\t777\t+\t42.67\t25.71\t68.38\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000581792\tENST00000615959\n+20\tchr1\t1196\t1231\t+\tchr2\t778\t817\t+\tUGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCCGCGGGGAU\t.(((.((((((.(((((((.(((((.....(((((&)))))..)))))))))).)).)))))).)))........\t-49.4\tGGCGUCCCCUUCCCCGCCGGCCGCCUUUCUCGCG\tCGCGACUGCGGCGGCGGUGGUGGGGGGAGCC\t2&1\tchr1\t1197\t1230\t+\tchr2\t778\t809\t+\t25.71\t42.67\t68.38\t1.0\t1.0\t1.0\tmiRNA\tmiRNA\tENST00000581792\tENST00000581792\n+20\tchr1\t1231\t1384\t+\tchr2\t817\t840\t-\tNA\tNA\tNA\tNA\tNA\tNA\tNA\tchr1\t1231\t1384\t+\tchr2\t817\t840\t-\t10610.00\t2400.00\t13010.0\t1.0\t0.83\t0.83\trRNA\tmiRNA\tENST00000611446\tENST00000584178\n'
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/training_data_context_150_st_on.npz
b
Binary file test-data/training_data_context_150_st_on.npz has changed
b
diff -r 000000000000 -r 7c6c282ecf5a test-data/training_data_mixed_context_100_st_on.npz
b
Binary file test-data/training_data_mixed_context_100_st_on.npz has changed