Repository 'segmentation_fold'
hg clone https://toolshed.g2.bx.psu.edu/repos/yhoogstrate/segmentation_fold

Changeset 4:63df1e23f4ff (2016-07-28)
Previous changeset 3:cd1bba1c66b3 (2016-03-31) Next changeset 5:b7cf9b172cfe (2016-08-03)
Commit message:
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/segmentation_fold_galaxy_wrapper commit 00690c63c51a7f7563f2428c313d7fa75f2657e5-dirty
modified:
segmentation-fold.xml
test-data/SNORD114-4-revised.scan-for-segments.txt
test-data/SNORD118-revised.scan-for-segments.txt
test-data/SNORD13-revised.scan-for-segments.txt
test-data/SNORD48-revised.scan-for-segments.txt
test-data/segments_truncated.2.out.txt
test-data/segments_truncated.out.txt
tool_dependencies.xml
added:
macros.xml
test-data/DBNFile.test_01.in.bam
test-data/DBNFile.test_01.in.dbn
test-data/DBNFile.test_02.in.bed
test-data/DBNFile.test_02.in.dbn
test-data/DBNFile.test_02.out.n.dbn
test-data/DBNFile.test_02.out.o.dbn
test-data/DBNFile.test_03.in.dbn
test-data/DBNFile.test_03.out.l.dbn
test-data/DBNFile.test_03.out.s.dbn
test-data/ExtractBoxedSequences.test_01.in.bed
test-data/ExtractBoxedSequences.test_01.in.fa
test-data/ExtractBoxedSequences.test_01.out.fa
test-data/FindBoxes.genome.fa
test-data/FindBoxes.test_02.bed
utils_add-read-counts.xml
utils_estimate-energy.xml
utils_extract-boxed-sequences.xml
utils_filter-annotated-entries.xml
utils_filter-by-energy.xml
utils_find-boxes.xml
removed:
energy-estimation-utility.xml
scan-for-segments.xml
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff energy-estimation-utility.xml
--- a/energy-estimation-utility.xml Thu Mar 31 04:26:12 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,111 +0,0 @@
-<tool id="energy_estimation_utility" name="energy-estimation-utility" version="1.6.3-1">
-    <description>Estimate the maximal energy a segment needs to assign to become part of the optimal structure using segmentation-fold</description>
-    
-    <requirements>
-        <requirement type="package" version="1.6.3">segmentation-fold</requirement>
-        <requirement type="package" version="2.7.10">python</requirement>
-    </requirements>
-    
-    <stdio></stdio>
-    
-    <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command>
-    
-    <command><![CDATA[
-        energy-estimation-utility
-            #if $parameters.use_custom_xml == "true"
-                -x "${parameters.input_xml}"
-            #else
-                -x "\$SEGMENTATION_FOLD_DEFAULT_XML"
-            #end if
-            
-            -r ${randomization.shuffle_n_times}
-        > "${output_list}"
-    ]]></command>
-    
-    <inputs>
-        <conditional name="parameters">
-            <param name="use_custom_xml"
-                type="boolean"
-                truevalue="true"
-                falsevalue="false"
-                selected="false"
-                label="Use segment definition from history" />
-
-            <when value="false" />
-            <when value="true">
-                <param name="input_xml"
-                    type="data"
-                    format="xml"
-                    multiple="false"
-                    argument="-x"
-                    label="Custom 'segments.xml'-syntaxed file" />
-            </when>
-        </conditional>
-        
-        <conditional name="randomization">
-            <param name="do_randomization"
-                type="boolean"
-                truevalue="true"
-                falsevalue="false"
-                selected="false"
-                label="Randomly shuffle the sequence(s) instead"
-                help="This can be helpful in determining a baseline of observing an energy parameter by chance" />
-
-            <when value="false">
-                <param name="shuffle_n_times"
-                       type="hidden"
-                       value="0"/>
-            </when>
-            <when value="true">
-                <param name="shuffle_n_times"
-                       type="integer"
-                       min="0"
-                       value="10" 
-                       argument="-r"
-                       label="Number of times the sequences have to be shuffled and energy parameters have to be estimated on" />
-            </when>
-        </conditional>
-    </inputs>
-    
-    <outputs>
-        <data format="text" name="output_list" label="${tool.name}" />
-    </outputs>
-    
-    <tests>
-        <test>
-            <param  name="use_custom_xml" value="true" />
-            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
-            <param  name="do_randomization" value="false" />
-            <param  name="shuffle_n_times" value="0" />
-            
-            <output name="output_list" file="segments_truncated.out.txt" lines_diff="2" /><!-- Accept rounding errors by diff CPU's etc. -->
-        </test>
-        <test>
-            <param  name="use_custom_xml" value="true" />
-            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
-            <param  name="do_randomization" value="false" />
-            <param  name="shuffle_n_times" value="1" />
-            
-            <output name="output_list" file="segments_truncated.2.out.txt" lines_diff="10" />
-        </test>
-    </tests>
-    
-    <help><![CDATA[
-The tool uses the sequences from the xml file.
-    ]]></help>
-    
-    <citations>
-        <citation type="bibtex">
-           @mastersthesis{mastersthesis,
-              author       = {Youri Hoogstrate}, 
-              title        = {An algorithm for predicting RNA 2D structures including K-turns},
-              school       = {University of Technology Delft, Leiden University},
-              year         = 2012,
-              address      = {},
-              month        = 11,
-              note         = {Research assignment for Master Computer-science},
-              url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
-            }
-        </citation>
-    </citations>
-</tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,44 @@
+<macros>
+    <token name="@VERSION@">smf-v1.6-5_utils-v2.0.1</token>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+        </stdio>
+    </xml>
+
+    <token name="@VERSION_COMMAND_SMF@">segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</token>
+    <token name="@VERSION_COMMAND_UTILS@">segmentation-fold-utils --version</token>
+
+
+    <token name="@REQUIREMENTS_UTILS@"><![CDATA[
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    ]]></token>
+
+    <token name="@REQUIREMENTS_SMF@"><![CDATA[
+        <requirement type="package" version="1.6.5">segmentation-fold</requirement>
+    ]]></token>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+               @mastersthesis{mastersthesis,
+                  author       = {Youri Hoogstrate}, 
+                  title        = {An algorithm for predicting RNA 2D structures including K-turns},
+                  school       = {University of Technology Delft, Leiden University},
+                  year         = 2012,
+                  address      = {},
+                  month        = 11,
+                  note         = {Research assignment for Master Computer-science},
+                  url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
+                }
+            </citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff scan-for-segments.xml
--- a/scan-for-segments.xml Thu Mar 31 04:26:12 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,105 +0,0 @@
-<tool id="scan_for_segments" name="scan-for-segments" version="1.6.3-1">
-    <description>Scan for the presence of segments in sequences using segmentation-fold</description>
-    
-    <requirements>
-        <requirement type="package" version="1.6.3">segmentation-fold</requirement>
-        <requirement type="package" version="2.7.10">python</requirement>
-    </requirements>
-    
-    <stdio></stdio>
-    
-    <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command>
-    
-    <command><![CDATA[
-        scan-for-segments
-            -T \${GALAXY_SLOTS:-4}
-            -x
-            #if str($parameters.use_custom_xml) == "true"
-                "${parameters.input_xml}"
-            #else
-                "\$SEGMENTATION_FOLD_DEFAULT_XML"
-            #end if
-            -p "in-depth"
-            "${input_fasta}"
-            
-            >   $output_list
-    ]]></command>
-
-    <inputs>
-        <param name="input_fasta"
-               type="data"
-               format="fasta"
-               argument="-f"
-               label="Fasta file with RNA-sequece" />
-
-        <conditional name="parameters">
-            <param name="use_custom_xml"
-                type="boolean"
-                truevalue="true"
-                falsevalue="false"
-                selected="false"
-                label="Use segment definition from history" />
-
-            <when value="false" />
-            <when value="true">
-                <param name="input_xml"
-                    type="data"
-                    format="xml"
-                    multiple="false"
-                    argument="-x"
-                    label="Custom 'segments.xml'-syntaxed file" />
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="text" name="output_list" label="${tool.name} on ${str($input_fasta.hid) + ': ' + $input_fasta.name}" />
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="input_fasta" value="SNORD13-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD13-revised.scan-for-segments.txt" />
-        </test>
-        <test>
-            <param name="input_fasta" value="SNORD48-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD48-revised.scan-for-segments.txt" />
-        </test>
-        <test>
-            <param name="input_fasta" value="SNORD114-4-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD114-4-revised.scan-for-segments.txt" />
-        </test>
-        <test>
-            <param name="input_fasta" value="SNORD118-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD118-revised.scan-for-segments.txt" />
-        </test>
-    </tests>
-    
-    <help><![CDATA[
-This is an utility of the segmentation-fold package that allows to scan for the presence of certain segments.
-If present, it will also scan for the Gibbs free energy necessairy the segment has to provide to contribute to the optimal structure.
-    ]]></help>
-    
-    <citations>
-        <citation type="bibtex">
-           @mastersthesis{mastersthesis,
-              author       = {Youri Hoogstrate}, 
-              title        = {An algorithm for predicting RNA 2D structures including K-turns},
-              school       = {University of Technology Delft, Leiden University},
-              year         = 2012,
-              address      = {},
-              month        = 11,
-              note         = {Research assignment for Master Computer-science},
-              url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
-            }
-        </citation>
-    </citations>
-</tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff segmentation-fold.xml
--- a/segmentation-fold.xml Thu Mar 31 04:26:12 2016 -0400
+++ b/segmentation-fold.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -1,13 +1,16 @@
-<tool id="segmentation_fold" name="segmentation-fold" version="1.6.3-1">
+<tool id="segmentation_fold" name="segmentation-fold" version="@VERSION@-1">
     <description>RNA-Folding including predefined segments including K-turns</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     
     <requirements>
-        <requirement type="package" version="1.6.3">segmentation-fold</requirement>
+        <requirement type="package" version="1.6.5">segmentation-fold</requirement>
     </requirements>
     
-    <stdio></stdio>
+    <expand macro="stdio" />
     
-    <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command>
+    <version_command>@VERSION_COMMAND_SMF@</version_command>
     
     <command><![CDATA[
         segmentation-fold
@@ -234,18 +237,5 @@
 Youri Hoogstrate (yhoogstrate @ github)
     ]]></help>
     
-    <citations>
-        <citation type="bibtex">
-           @mastersthesis{mastersthesis,
-              author       = {Youri Hoogstrate}, 
-              title        = {An algorithm for predicting RNA 2D structures including K-turns},
-              school       = {University of Technology Delft, Leiden University},
-              year         = 2012,
-              address      = {},
-              month        = 11,
-              note         = {Research assignment for Master Computer-science},
-              url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
-            }
-        </citation>
-    </citations>
+    <expand macro="citations" />
 </tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_01.in.bam
b
Binary file test-data/DBNFile.test_01.in.bam has changed
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_01.in.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_01.in.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,8 @@
+>chr1:10-21 x unknown-01
+GGGGAAACCCC
+((((...)))) ((.((.)).)) -2.5
+((.((.)).)) (((((.))))) -3.5
+>chr1:25-36 x unknown-01
+AAAAAAAAAAA
+>chr1:45-56 x unknown-01
+AAAAAAAAAAA
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_02.in.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.in.bed Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,6 @@
+chr1 0 1 firstbase 0 +
+chr1 0 5 1-2-3-4-5 0 +
+chr1 5 10 6-7-8-9-10 0 +
+chr1 10 11 hideme 0 +
+chr2 0 5 hideme2 0 +
+chr2 5 10 hideme3 0 +
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_02.in.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.in.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,6 @@
+>chr1:0-10 x unknown-01
+AAAAAAAAAA
+>chr1:25-36 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 1)
+AAAAAAAAAAA
+>chr1:45-56 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 2)
+AAAAAAAAAAA
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_02.out.n.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.out.n.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,4 @@
+>chr1:25-36 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 1)
+AAAAAAAAAAA
+>chr1:45-56 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 2)
+AAAAAAAAAAA
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_02.out.o.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.out.o.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,2 @@
+>chr1:0-10 x unknown-01 (overlap in tests/test-data/DBNFile.test_02.in.bed: firstbase,1-2-3-4-5,6-7-8-9-10)
+AAAAAAAAAA
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_03.in.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_03.in.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,42 @@
+>chr3.rna:5-35(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna:5-35(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........ .((...(((((...........))))))). -13.125
+>chr3.rna:5-80(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:5-80(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+>chr3.rna:50-80(+) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:50-80(+) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+...((............(((((.((.((((.....)))).))))))).((((((...........)))))))) ...((..((((((............))...)))).(((......))).((((((...........)))))))) 0.0
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+.........(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))...... ..(((..((((((............))...))))...........(((((((((...........)))))))))...)))....... 0.0
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+>chr3.rna.RC:35-5(-) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:35-5(-) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125
+>chr3.rna.RC:80-5(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:80-5(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+>chr3.rna.RC:80-50(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna.RC:80-50(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........ .((...(((((...........))))))). -13.125
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_03.out.l.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_03.out.l.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,14 @@
+>chr3.rna:5-80(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+...((............(((((.((.((((.....)))).))))))).((((((...........)))))))) ...((..((((((............))...)))).(((......))).((((((...........)))))))) 0.0
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+.........(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))...... ..(((..((((((............))...))))...........(((((((((...........)))))))))...)))....... 0.0
+>chr3.rna.RC:80-5(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/DBNFile.test_03.out.s.dbn
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_03.out.s.dbn Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,28 @@
+>chr3.rna:5-35(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna:5-35(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........ .((...(((((...........))))))). -13.125
+>chr3.rna:5-80(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:50-80(+) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:50-80(+) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+>chr3.rna.RC:35-5(-) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:35-5(-) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125
+>chr3.rna.RC:80-5(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:80-50(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna.RC:80-50(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........ .((...(((((...........))))))). -13.125
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/ExtractBoxedSequences.test_01.in.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ExtractBoxedSequences.test_01.in.bed Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,6 @@
+chr10 0 7 box1-f:NRTGATG 0 +
+chr10 14 18 box2-f:CTGA 0 +
+chr10 28 35 box1-f:NRTGATG 0 +
+chr10 42 46 box2-f:CTGA 0 +
+chr10 56 63 box1-f:NRTGATG 0 +
+chr10 70 74 box2-f:CTGA 0 +
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/ExtractBoxedSequences.test_01.in.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ExtractBoxedSequences.test_01.in.fa Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,12 @@
+>chr10
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/ExtractBoxedSequences.test_01.out.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ExtractBoxedSequences.test_01.out.fa Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,12 @@
+>chr10:0-18(+)
+AATGATGaaaaaaaCTGA
+>chr10:0-46(+)
+AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA
+>chr10:0-74(+)
+AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA
+>chr10:28-46(+)
+AATGATGaaaaaaaCTGA
+>chr10:28-74(+)
+AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA
+>chr10:56-74(+)
+AATGATGaaaaaaaCTGA
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/FindBoxes.genome.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FindBoxes.genome.fa Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,44 @@
+>chr1
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+>chr2
+aaaaaCTGAaaaaaaaCTGAaaaaa
+>chr3
+CTGAaaaaaaaCTGA
+>chr4
+CTGACTGA
+>chr5
+TCAGaaaaaaTCAG
+>chr6
+tcagAAAAAAtcag
+>chr7
+AATGATG
+CATGATG
+TATGATG
+GATGATG
+AGTGATG
+CGTGATG
+TGTGATG
+GGTGATG
+>chr8_no_valid_C_boxes
+ACTGATG
+CCTGATG
+TCTGATG
+GCTGATG
+ATTGATG
+CTTGATG
+TTTGATG
+GTTGATG
+>chr9
+CATCACCCATCACACATCACGCATCACTCATCATCCATCATACATCATGCATCATT
+>chr10
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/FindBoxes.test_02.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FindBoxes.test_02.bed Thu Jul 28 10:25:37 2016 -0400
b
@@ -0,0 +1,36 @@
+chr2 5 9 box2-f:CTGA 0 +
+chr2 16 20 box2-f:CTGA 0 +
+chr3 0 4 box2-f:CTGA 0 +
+chr3 11 15 box2-f:CTGA 0 +
+chr4 0 4 box2-f:CTGA 0 +
+chr4 4 8 box2-f:CTGA 0 +
+chr5 0 4 box2-r:TCAG 0 -
+chr5 10 14 box2-r:TCAG 0 -
+chr6 0 4 box2-r:TCAG 0 -
+chr6 10 14 box2-r:TCAG 0 -
+chr7 0 7 box1-f:NRTGATG 0 +
+chr7 7 14 box1-f:NRTGATG 0 +
+chr7 14 21 box1-f:NRTGATG 0 +
+chr7 21 28 box1-f:NRTGATG 0 +
+chr7 28 35 box1-f:NRTGATG 0 +
+chr7 35 42 box1-f:NRTGATG 0 +
+chr7 42 49 box1-f:NRTGATG 0 +
+chr7 49 56 box1-f:NRTGATG 0 +
+chr8_no_valid_C_boxes 1 5 box2-f:CTGA 0 +
+chr8_no_valid_C_boxes 8 12 box2-f:CTGA 0 +
+chr8_no_valid_C_boxes 15 19 box2-f:CTGA 0 +
+chr8_no_valid_C_boxes 22 26 box2-f:CTGA 0 +
+chr9 0 7 box1-r:CATCAYN 0 -
+chr9 7 14 box1-r:CATCAYN 0 -
+chr9 14 21 box1-r:CATCAYN 0 -
+chr9 21 28 box1-r:CATCAYN 0 -
+chr9 28 35 box1-r:CATCAYN 0 -
+chr9 35 42 box1-r:CATCAYN 0 -
+chr9 42 49 box1-r:CATCAYN 0 -
+chr9 49 56 box1-r:CATCAYN 0 -
+chr10 0 7 box1-f:NRTGATG 0 +
+chr10 14 18 box2-f:CTGA 0 +
+chr10 28 35 box1-f:NRTGATG 0 +
+chr10 42 46 box2-f:CTGA 0 +
+chr10 56 63 box1-f:NRTGATG 0 +
+chr10 70 74 box2-f:CTGA 0 +
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/SNORD114-4-revised.scan-for-segments.txt
--- a/test-data/SNORD114-4-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD114-4-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,74 +1,57 @@
->SNORD114-4 x Kt-42.dra
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-42.eco
+>SNORD114-4 revised x Kt-7 G2nA SAM riboswitch (H. marismortui)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Loop-E-Motif.bac
+>SNORD114-4 revised x Kt-7 (T. thermophilus)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.UAU
+>SNORD114-4 revised x Kt-7 (E. coli)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.UGU
+>SNORD114-4 revised x Kt-7 (D. radiodurans)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.CAU
+>SNORD114-4 revised x Kt-11 (T. thermophilus)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-42.tth
+>SNORD114-4 revised x Kt-11.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 G2nA SAM riboswitch (H. marismortui)
+>SNORD114-4 revised x Kt-15.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-15.hma
+.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..))))) .((((((....(((.((.....((((......))))((....))...)...))))...(((....))).)))))) -6.42471313477
+>SNORD114-4 revised x Kt-23.tth
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
-.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..))))) .((((((....(((.((.....((((......))))((....))...)...))))...(((....))).)))))) -6.4197063446
->SNORD114-4 x Kt-U4b.hsa
+>SNORD114-4 revised x Kt-23.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.GGU
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 (E. coli)
+>SNORD114-4 revised x Kt-38.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 (D. radiodurans)
+>SNORD114-4 revised x Kt-42.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-58.hma
+>SNORD114-4 revised x Kt-42.tth
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.tth
+>SNORD114-4 revised x Kt-42.dra
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-SAM-ribo.tte
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-23.eco
+>SNORD114-4 revised x Kt-42.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-38.hma
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.dra
+>SNORD114-4 revised x Kt-46.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.CGU
+>SNORD114-4 revised x Kt-46.tth
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-T-box.bsu
+>SNORD114-4 revised x Kt-46.dra
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-23.tth
+>SNORD114-4 revised x Kt-46.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.eco
+>SNORD114-4 revised x Kt-58.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-c-di-GMP-II.cac
+>SNORD114-4 revised x Kt-U4a.hsa
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-11 (T. thermophilus)
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.AAU
+>SNORD114-4 revised x Kt-U4b.hsa
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-L30e.sce
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 (T. thermophilus)
+>SNORD114-4 revised x Kt-CD-box.CGU
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-42.hma
+>SNORD114-4 revised x Kt-CD-box.UGU
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-U4a.hsa
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-G2nA-SAMribo.bsu
+>SNORD114-4 revised x Kt-L30e.sce
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.GAU
+>SNORD114-4 revised x Kt-SAM-ribo.tte
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-11.eco
+>SNORD114-4 revised x Kt-T-box.bsu
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.hma
+>SNORD114-4 revised x Kt-c-di-GMP-II.cac
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.AGU
+>SNORD114-4 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
-.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..))))) .((((((...(((((.......((((......)))).((((((..........)))))).....))))))))))) 1.99971199036
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/SNORD118-revised.scan-for-segments.txt
--- a/test-data/SNORD118-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD118-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,75 +1,57 @@
->SNORD118 x Kt-42.dra
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-42.eco
+>SNORD118 revised x Kt-7 G2nA SAM riboswitch (H. marismortui)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Loop-E-Motif.bac
+>SNORD118 revised x Kt-7 (T. thermophilus)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.UAU
+>SNORD118 revised x Kt-7 (E. coli)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.UGU
+>SNORD118 revised x Kt-7 (D. radiodurans)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.CAU
+>SNORD118 revised x Kt-11 (T. thermophilus)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
-.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) .(((...((((((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).))))))))). 3.380651474
->SNORD118 x Kt-42.tth
+>SNORD118 revised x Kt-11.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
+>SNORD118 revised x Kt-15.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 G2nA SAM riboswitch (H. marismortui)
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-15.hma
+>SNORD118 revised x Kt-23.tth
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-U4b.hsa
+>SNORD118 revised x Kt-23.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.GGU
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 (E. coli)
+>SNORD118 revised x Kt-38.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 (D. radiodurans)
+>SNORD118 revised x Kt-42.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-58.hma
+>SNORD118 revised x Kt-42.tth
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.tth
+>SNORD118 revised x Kt-42.dra
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-SAM-ribo.tte
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-23.eco
+>SNORD118 revised x Kt-42.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-38.hma
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.dra
+>SNORD118 revised x Kt-46.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.CGU
+>SNORD118 revised x Kt-46.tth
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
-.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) (((((...((((((............))...))))...))......(((((((((...........))))))))).....)))...... -15.021024704
->SNORD118 x Kt-T-box.bsu
+>SNORD118 revised x Kt-46.dra
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-23.tth
+>SNORD118 revised x Kt-46.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.eco
+>SNORD118 revised x Kt-58.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-c-di-GMP-II.cac
+>SNORD118 revised x Kt-U4a.hsa
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-11 (T. thermophilus)
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.AAU
+>SNORD118 revised x Kt-U4b.hsa
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-L30e.sce
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 (T. thermophilus)
+>SNORD118 revised x Kt-CD-box.CGU
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-42.hma
+.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) (((((...((((((............))...))))...))......(((((((((...........))))))))).....)))...... -15.038848877
+>SNORD118 revised x Kt-CD-box.UGU
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-U4a.hsa
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-G2nA-SAMribo.bsu
+>SNORD118 revised x Kt-L30e.sce
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.GAU
+>SNORD118 revised x Kt-SAM-ribo.tte
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
-.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) (((.(((...(((((......))))))((((.....))))...)).(((((((((...........))))))))).....)))...... -8.30012321472
->SNORD118 x Kt-11.eco
+>SNORD118 revised x Kt-T-box.bsu
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.hma
+>SNORD118 revised x Kt-c-di-GMP-II.cac
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.AGU
+>SNORD118 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/SNORD13-revised.scan-for-segments.txt
--- a/test-data/SNORD13-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD13-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,74 +1,57 @@
->SNORD13 x Kt-42.dra
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-42.eco
+>SNORD13 revised x Kt-7 G2nA SAM riboswitch (H. marismortui)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Loop-E-Motif.bac
+>SNORD13 revised x Kt-7 (T. thermophilus)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.UAU
+>SNORD13 revised x Kt-7 (E. coli)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.UGU
+>SNORD13 revised x Kt-7 (D. radiodurans)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.CAU
+>SNORD13 revised x Kt-11 (T. thermophilus)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-42.tth
+>SNORD13 revised x Kt-11.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 G2nA SAM riboswitch (H. marismortui)
+>SNORD13 revised x Kt-15.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-15.hma
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-U4b.hsa
+>SNORD13 revised x Kt-23.tth
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.GGU
+>SNORD13 revised x Kt-23.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 (E. coli)
+>SNORD13 revised x Kt-38.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 (D. radiodurans)
+>SNORD13 revised x Kt-42.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-58.hma
+>SNORD13 revised x Kt-42.tth
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.tth
+>SNORD13 revised x Kt-42.dra
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-SAM-ribo.tte
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-23.eco
+>SNORD13 revised x Kt-42.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-38.hma
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.dra
+>SNORD13 revised x Kt-46.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.CGU
+>SNORD13 revised x Kt-46.tth
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
-(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..)))))) (((((.(.(...((((((.((((((((((((....)))))))(((((((..(.((((((...))).))).)..))))))))))))))))))))))))) 3.35026359558
->SNORD13 x Kt-T-box.bsu
+>SNORD13 revised x Kt-46.dra
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-23.tth
+>SNORD13 revised x Kt-46.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.eco
+>SNORD13 revised x Kt-58.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-c-di-GMP-II.cac
+>SNORD13 revised x Kt-U4a.hsa
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-11 (T. thermophilus)
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.AAU
+>SNORD13 revised x Kt-U4b.hsa
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-L30e.sce
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 (T. thermophilus)
+>SNORD13 revised x Kt-CD-box.CGU
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-42.hma
+(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..)))))) (((((.(.(...((((((.((((((((((((....)))))))(((((((..(.((((((...))).))).)..))))))))))))))))))))))))) 3.37054443359
+>SNORD13 revised x Kt-CD-box.UGU
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-U4a.hsa
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-G2nA-SAMribo.bsu
+>SNORD13 revised x Kt-L30e.sce
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.GAU
+>SNORD13 revised x Kt-SAM-ribo.tte
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
-(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..)))))) (((((((.((((...((((((((((....))))))..)))))...)))))............((((.((((.((........)))))))))).))))) -9.91994667053
->SNORD13 x Kt-11.eco
+>SNORD13 revised x Kt-T-box.bsu
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.hma
+>SNORD13 revised x Kt-c-di-GMP-II.cac
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.AGU
+>SNORD13 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/SNORD48-revised.scan-for-segments.txt
--- a/test-data/SNORD48-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD48-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,74 +1,56 @@
->SNORD48 x Kt-42.dra
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-42.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 G2nA SAM riboswitch (H. marismortui)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Loop-E-Motif.bac
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (T. thermophilus)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.UAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (E. coli)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.UGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (D. radiodurans)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.CAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-11 (T. thermophilus)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-42.tth
+>SNORD48 revised (2 extra bases on 5' end) x Kt-11.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 G2nA SAM riboswitch (H. marismortui)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-15.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-15.hma
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-U4b.hsa
+>SNORD48 revised (2 extra bases on 5' end) x Kt-23.tth
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.GGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-23.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 (E. coli)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-38.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 (D. radiodurans)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-58.hma
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.tth
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.tth
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.dra
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-SAM-ribo.tte
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-23.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-38.hma
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.dra
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.CGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.tth
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-T-box.bsu
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.dra
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-23.tth
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-58.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-c-di-GMP-II.cac
+>SNORD48 revised (2 extra bases on 5' end) x Kt-U4a.hsa
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-11 (T. thermophilus)
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.AAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-U4b.hsa
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-L30e.sce
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 (T. thermophilus)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-CD-box.CGU
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-42.hma
+>SNORD48 revised (2 extra bases on 5' end) x Kt-CD-box.UGU
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-U4a.hsa
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-G2nA-SAMribo.bsu
+>SNORD48 revised (2 extra bases on 5' end) x Kt-L30e.sce
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.GAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-SAM-ribo.tte
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
-((((((.((.((((((((((.....)))).).).)))).((((...))))...)).)))))).... (((((...(((((((...))).....))))).....(((((............))))))))).... -10.9206504822
->SNORD48 x Kt-11.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-T-box.bsu
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.hma
+>SNORD48 revised (2 extra bases on 5' end) x Kt-c-di-GMP-II.cac
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.AGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
-((((((.((.((((((((((.....)))).).).)))).((((...))))...)).)))))).... ((...(((((..((..((((.....)))).))((((.((((((...))))..)).))))))))))) -1.84962844849
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/segments_truncated.2.out.txt
--- a/test-data/segments_truncated.2.out.txt Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/segments_truncated.2.out.txt Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,5 +1,7 @@
->C/D-box snoRNA (shuffle iteration 1)
-agaggCGUGAUcccaacgUGAuggc
-....((((.......))))...... ....(...((((.....)))))... -9.3900680542
->Artificial double C/D K-turn construct (shuffle iteration 1)
-uguucugucacggcacauaccuccggUGUGAUggUGAauaguaUGAgaaguaucgugugucagaggcccuaaUGUGAUgccuuaa
+>C/D-box snoRNA x Kt-CD-box.CGU
+GCUCUGACCGAAAGGCGUGAUGAGC
+((((....((.....))....)))) (((((((((....))...))))))) 2.49938964844
+>Artificial double C/D K-turn construct x Kt-CD-box.UGU
+GGGAGUCUUGUGAUGAGAAGUACUGGAUCUGAAGUAGCCCUUUUUGGGCUACUUGUGAUGAAACACUCAUGGUCUGAAGACUCCC
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff test-data/segments_truncated.out.txt
--- a/test-data/segments_truncated.out.txt Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/segments_truncated.out.txt Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,6 +1,7 @@
->C/D-box snoRNA
+>C/D-box snoRNA x Kt-CD-box.CGU
 GCUCUGACCGAAAGGCGUGAUGAGC
-((((....((.....))....)))) (((((((((....))...))))))) 2.50072479248
->Artificial double C/D K-turn construct
+((((....((.....))....)))) (((((((((....))...))))))) 2.49938964844
+>Artificial double C/D K-turn construct x Kt-CD-box.UGU
 GGGAGUCUUGUGAUGAGAAGUACUGGAUCUGAAGUAGCCCUUUUUGGGCUACUUGUGAUGAAACACUCAUGGUCUGAAGACUCCC
-((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.25102996826
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff tool_dependencies.xml
--- a/tool_dependencies.xml Thu Mar 31 04:26:12 2016 -0400
+++ b/tool_dependencies.xml Thu Jul 28 10:25:37 2016 -0400
b
@@ -1,10 +1,24 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="segmentation-fold" version="1.6.3">
-        <repository changeset_revision="f4981e860e2c" name="package_segmentation_fold_1_6_3" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="segmentation-fold" version="1.6.5">
+        <repository changeset_revision="f448376f428f" name="package_segmentation_fold_1_6_5__utils_2_0_1" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     
+
     <package name="python" version="2.7.10">
         <repository changeset_revision="0339c4a9b87b" name="package_python_2_7_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
+    <package name="numpy" version="1.9">
+        <repository changeset_revision="f24fc0b630fc" name="package_python_2_7_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="pysam" version="0.8.2.1">
+        <repository changeset_revision="f8fecf1f6eba" name="package_pysam_0_8_2" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="htseq" version="0.6.1">
+        <repository changeset_revision="fbb72996807d" name="package_htseq_0_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    
+    <package name="segmentation-fold-utils" version="2.0.1">
+        <repository changeset_revision="f448376f428f" name="package_segmentation_fold_1_6_5__utils_2_0_1" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
 </tool_dependency>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff utils_add-read-counts.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_add-read-counts.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,81 @@
+<tool id="smf_utils_add-read-counts" name="add-read-counts" version="@VERSION@-3">
+    <description>Annotate sequences by adding the read counts from a bam file, within a region contained in the fasta header of the dbn file</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        ln -f -s '${bam_input_file.metadata.bam_index}' '${bam_input_file}.bai' &&
+        
+        segmentation-fold-utils
+            add-read-counts
+                --regex '${regex.replace("'","\\'")}'
+                '$dbn_input_file'
+                '$bam_input_file'
+                '$dbn_output_file'
+    ]]></command>
+
+    <inputs>
+        <param name="dbn_input_file"
+               type="data"
+               format="dbn,txt,text"
+               label="Input DBN file"
+               help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/>
+        <param name="bam_input_file"
+               type="data"
+               format="bam"
+               label="Input BAM file"/>
+        <param name="regex"
+               type="text"
+               argument="--regex"
+               value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'
+               label="Regex to capture the targeted location in DBN file"
+               help="Do not change this value unless you're using customized software in the pipeline - default: '>.*?(chr[^:]):([0-9]+)-([0-9]+)'" />
+    </inputs>
+
+    <outputs>
+        <data name="dbn_output_file"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="dbn_input_file" value="DBNFile.test_01.in.dbn" ftype="dbn"/>
+            <param name="bam_input_file" value="DBNFile.test_01.in.bam" ftype="bam"/>
+            <param name="regex" value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'/>
+
+            <output name="dbn_output_file">
+                <assert_contents>
+                    <has_line_matching expression="&gt;chr1:10-21 x unknown-01 \(aligned reads .*?: 20\)"/>
+                    <has_line line="GGGGAAACCCC"/>
+                    <has_line line="((((...))))&#009;((.((.)).))&#009;-2.5"/>
+                    <has_line line="((.((.)).))&#009;(((((.)))))&#009;-3.5"/>
+                    
+                    <has_line_matching expression="&gt;chr1:25-36 x unknown-01 \(aligned reads.*?: 1\)"/>
+                    <has_line line="AAAAAAAAAAA"/>
+                    
+                    <has_line_matching expression="&gt;chr1:45-56 x unknown-01 \(aligned reads .*?: 2\)"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+This is an utility of the segmentation-fold package
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff utils_estimate-energy.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_estimate-energy.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,144 @@
+<tool id="smf_utils_estimate-energy" name="estimate-energy" version="@VERSION@-2">
+    <description>Estimates whether a certain Segment(Loop) is present and for which delta-G this transistion takes place</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+
+        <requirement type="package" version="1.6.5">segmentation-fold</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>echo $(@VERSION_COMMAND_SMF@)", "$(@VERSION_COMMAND_UTILS@)</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            estimate-energy
+                -T \${GALAXY_SLOTS:-2}
+                -x
+                #if str($parameters.use_custom_xml) == "true"
+                    "${parameters.input_xml}"
+                #else
+                    "\$SEGMENTATION_FOLD_DEFAULT_XML"
+                #end if
+                
+                -p $precision
+                -r $randomize
+                
+                #if $sequences_from_fasta_file:
+                    --sequences-from-fasta-file "${sequences_from_fasta_file}"
+                #end if
+                
+                $output_list
+    ]]></command>
+
+    <inputs>
+        <conditional name="parameters">
+            <param name="use_custom_xml"
+                type="boolean"
+                truevalue="true"
+                falsevalue="false"
+                selected="false"
+                label="Use segment definition from history" />
+
+            <when value="false" />
+            <when value="true">
+                <param name="input_xml"
+                    type="data"
+                    format="xml"
+                    multiple="false"
+                    argument="-x"
+                    label="Custom 'segments.xml'-syntaxed file" />
+            </when>
+        </conditional>
+        
+        <param name="precision"
+               type="float"
+               value="0.05"
+               min="0"
+               argument="--precision"
+               label="Precision"
+               help="Minimal difference for binary split - the smaller this value the slower. if this value equals 0, the difference is set to infinity (default: 0.05)" />
+
+        <param name="randomize"
+               type="integer"
+               value="0"
+               min="0"
+               argument="--randomize"
+               label="Shuffle each sequence this many times and predict energy of shuffled sequence(s) (default: 0, 0 means disabled)" />
+        
+        <param name="sequences_from_fasta_file"
+               type="data"
+               format="fasta"
+               multiple="false"
+               optional="true"
+               argument="--sequences-from-fasta-file"
+               label="Optional sequences to scan for Segment(Loop)s (FASTA)"
+               help="Use sequences from a FASTA file instead of the XML file that contains the segments. In XML files you can explicitly link one Segment(Loop) to one particular sequence instead of doing n*n comparisons (default: None)" />
+    </inputs>
+
+    <outputs>
+        <data format="dbn" name="output_list" label="${tool.name}" />
+    </outputs>
+
+    <tests>
+        <!-- xml * fasta mode -->
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD13-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD13-revised.scan-for-segments.txt" />
+        </test>
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD48-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD48-revised.scan-for-segments.txt" />
+        </test>
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD114-4-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD114-4-revised.scan-for-segments.txt" />
+        </test>
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD118-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD118-revised.scan-for-segments.txt" />
+        </test>
+        
+        
+        <!-- xml * xml mode -->
+        <test>
+            <param  name="use_custom_xml" value="true" />
+            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
+            <param  name="do_randomization" value="false" />
+            <param  name="shuffle_n_times" value="0" />
+            
+            <output name="output_list" file="segments_truncated.out.txt" lines_diff="2" />
+        </test>
+        <test>
+            <param  name="use_custom_xml" value="true" />
+            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
+            <param  name="do_randomization" value="false" />
+            <param  name="shuffle_n_times" value="1" />
+            
+            <output name="output_list" file="segments_truncated.2.out.txt" lines_diff="10" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+This is an utility of the segmentation-fold package that allows to scan for the presence of certain segments.
+If present, it will also scan for the Gibbs free energy necessairy the segment has to provide to contribute to the optimal structure.
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff utils_extract-boxed-sequences.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_extract-boxed-sequences.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,114 @@
+<tool id="smf_utils_extract-boxed-sequences" name="extract-boxed-sequences" version="@VERSION@-1">
+    <description>Extracts boxed sequences from bed_input_file which has to be created with 'find-box', part of this utility</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            extract-boxed-sequences
+
+                --max-inner-dist $max_inner_dist
+                --bp-extension   $bp_extension
+                
+                '${fasta_input_file}'
+                '${bed_input_file}'
+                '${fasta_output_file}'
+
+    ]]></command>
+
+    <inputs>
+        <param name="fasta_input_file"
+               type="data"
+               format="fasta"
+               label="Genomic reference FASTA file"/>
+        <param name="bed_input_file"
+               type="data"
+               format="bed"
+               label="BED file containing the sequence boxes"
+               help="This file should have been created with 'find-boxes'"/>
+        
+        <param name="max_inner_dist"
+               type="integer"
+               min="0"
+               value="250"
+               label="Maximal distance between the boxes"
+               help="(default=250bp)"/>
+        <param name="bp_extension"
+               type="integer"
+               min="0"
+               value="10"
+               label="Extend extracted sequences with this number of bases"
+               help="(default: 10bp)"/>
+    </inputs>
+
+    <outputs>
+        <data name="fasta_output_file"
+              format="fasta"
+              label="${tool.name} on ${fasta_input_file.hid}: ${fasta_input_file.name}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="fasta_input_file" value="ExtractBoxedSequences.test_01.in.fa" ftype="fasta"/>
+            <param name="bed_input_file" value="ExtractBoxedSequences.test_01.in.bed" ftype="bed"/>
+            <param name="max_inner_dist" value='100'/>
+            <param name="bp_extension" value='0'/>
+
+            <output name="fasta_output_file" file="ExtractBoxedSequences.test_01.out.fa"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+extact-boxed-sequences
+----------------------
+*Extracts boxes sequences from `bed_input_file` which has to be created with 'find-box', also part of this utility*
+
+The user can use this utility to extract sequences containing the boxes provided in the bed file by `find-boxes`.
+
+**input**
+
+Important information about the input:
+
+ - `FASTA_INPUT_FILE` can be any generic FASTA file that can be read with pysam. This means that if the sequence is split into multiple lines, they must all be at the same length.
+ - `BED_INPUT_FILE` the bed file should be provided by `find-boxes` as it properly denotes the names (box1-f, box1-r, box2-f and box2-r) which are used for extraction.
+ - `-d, --max-inner-dist INTEGER` Only sequences for which the distance in bases between the boxes is smaller than this distance, will be extracted. Boxes are excluded from this distance.
+ - `-e, --bp-extension INTEGER` Each sequence will be exteded with:
+  * The boxes
+  * An optional number of bases provided with this argument
+
+**output**
+
+Be aware that there can be overlapping sequences. For example, if you started box1=`TTTT` and box2=`CCCC` with the following sequence, you will extract 2 sequences:
+
+```>seq
+gagagaTTTTgagagaTTTTgagagagagagagagaCCCCgaga
+```
+
+Namely:
+
+```TTTTgagagaTTTTgagagagagagagagaCCCC
+```
+
+and
+
+```          TTTTgagagagagagagagaCCCC
+```
+
+This is an utility of the segmentation-fold package
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff utils_filter-annotated-entries.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_filter-annotated-entries.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,84 @@
+<tool id="smf_utils_filter-annotated-entries" name="filter-annotated-entries" version="@VERSION@-1">
+    <description>Split entries into two files based on whether they overlap annotations in a bed file</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            filter-annotated-entries
+                --regex '${regex.replace("'","\\'")}'
+                '$dbn_input_file'
+                '$bed_input_file'
+                '$dbn_output_file_overlapping'
+                '$dbn_output_file_non_overlapping'
+    ]]></command>
+
+    <inputs>
+        <param name="dbn_input_file"
+               type="data"
+               format="dbn,txt"
+               label="Input DBN file"
+               help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/>
+        <param name="bed_input_file"
+               type="data"
+               format="bed"
+               label="The resultes will be filtered based on overlap with annotations in this BED file"/>
+        <param name="regex"
+               type="text"
+               argument="--regex"
+               value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'
+               label="Regex to capture the targeted location in DBN file"
+               help="Do not change this value unless you're using customized software in the pipeline - default: '>.*?(chr[^:]):([0-9]+)-([0-9]+)'" />
+    </inputs>
+
+    <outputs>
+        <data name="dbn_output_file_overlapping"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - overlapping entries"/>
+        <data name="dbn_output_file_non_overlapping"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - non overlapping entries"/>
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="dbn_input_file" value="DBNFile.test_02.in.dbn" ftype="dbn"/>
+            <param name="bed_input_file" value="DBNFile.test_02.in.bed" ftype="bed"/>
+            <param name="regex" value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'/>
+
+            <output name="dbn_output_file_overlapping">
+                <assert_contents>
+                    <has_line_matching expression="&gt;chr1:0-10 x unknown-01 \(overlap in .*?: firstbase,1-2-3-4-5,6-7-8-9-10\)"/>
+                    <has_line line="AAAAAAAAAA"/>
+                </assert_contents>
+            </output>
+            <output name="dbn_output_file_non_overlapping">
+                <assert_contents>
+                    <has_line_matching expression="&gt;chr1:25-36 x unknown-01 \(aligned reads .*?: 1\)"/>
+                    <has_line line="AAAAAAAAAAA"/>
+                    
+                    <has_line_matching expression="&gt;chr1:45-56 x unknown-01 \(aligned reads .*?: 2\)"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+Filter based on whether the entries in the DBN file are already annotated or not
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff utils_filter-by-energy.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_filter-by-energy.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,66 @@
+<tool id="smf_utils_filter-by-energy" name="filter-by-energy" version="@VERSION@-1">
+    <description>Split entries over two files based on the estimated energy</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            filter-by-energy
+                --energy ${energy}
+                '$dbn_input_file'
+                '$dbn_output_file_larger_or_equal'
+                '$dbn_output_file_smaller'
+    ]]></command>
+
+    <inputs>
+        <param name="dbn_input_file"
+               type="data"
+               format="dbn,txt"
+               label="Input DBN file"
+               help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/>
+        <param name="energy"
+               type="float"
+               value="0.0"
+               argument="--energy"
+               label="Energy value"
+               help="Entries with transitions with energy smaller than energy (&lt; e) or without transitions will be put into DBN_OUTPUT_FILE_LARGER_OR_EQUAL and those larger or equal (&gt;= e) to DBN_OUTPUT_FILE_SMALLER" />
+    </inputs>
+
+    <outputs>
+        <data name="dbn_output_file_larger_or_equal"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - energy larger/equal than selected"/>
+        <data name="dbn_output_file_smaller"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - energy smaller than selected"/>
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="dbn_input_file" value="DBNFile.test_03.in.dbn" ftype="dbn"/>
+            <param name="energy" value='0.0'/>
+
+            <output name="dbn_output_file_larger_or_equal" file="DBNFile.test_03.out.l.dbn" />
+            <output name="dbn_output_file_smaller" file="DBNFile.test_03.out.s.dbn" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+Filter based on whether the entries in the DBN file are already annotated or not
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
b
diff -r cd1bba1c66b3 -r 63df1e23f4ff utils_find-boxes.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_find-boxes.xml Thu Jul 28 10:25:37 2016 -0400
[
@@ -0,0 +1,90 @@
+<tool id="smf_utils_find-boxes" name="find-boxes" version="@VERSION@-1">
+    <description>Finds all occurances of two given boxes (sequence motifs) within a FASTA file</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    
+    <expand macro="stdio" />
+
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            find-boxes
+                --box1 '${box1}'
+                --box2 '${box2}'
+                $forward
+                $reverse
+                '${fasta_input}'
+                '${bed_output}'
+    ]]></command>
+
+    <inputs>
+        <param name="fasta_input"
+               type="data"
+               format="fasta"
+               argument="-f"
+               label="Fasta file with RNA-sequece" />
+
+        <param name="box1"
+               type="text"
+               value="NRUGAUG"
+               argument="--box1"
+               label="Sequence of box1 (default = C-box: 'NRUGAUG')"
+               help="Sequence encoding can be found at the following url: https://en.wikipedia.org/wiki/FASTA_format#Sequence_representations" />
+
+        <param name="box2"
+               type="text"
+               value="CUGA"
+               argument="--box2"
+               label="Sequence of box2 (default = D-box: 'CUGA')"
+               help="Sequence encoding can be found at the following url: https://en.wikipedia.org/wiki/FASTA_format#Sequence_representations" />
+
+        <param name="forward"
+               type="boolean"
+               truevalue="--forward"
+               falsevalue="--no-forward"
+               checked="true"
+               label="Search in the forward direction of the reference sequence" />
+
+        <param name="reverse"
+               type="boolean"
+               truevalue="--reverse"
+               falsevalue="--no-reverse"
+               checked="true"
+               label="Search in the reverse complement of the reference sequence" />
+    </inputs>
+
+    <outputs>
+        <data format="bed"
+              name="bed_output"
+              label="${tool.name} on ${str($fasta_input.hid) + ': ' + $fasta_input.name}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="fasta_input" value="FindBoxes.genome.fa" format="fasta" />
+            <param name="box1" value="NRUGAUG" />
+            <param name="box2" value="CUGA" />
+            <param name="forward" value="--forward" />
+            <param name="reverse" value="--reverse" />
+            
+            <output name="bed_output" file="FindBoxes.test_02.bed" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+Scans a FASTA reference for BOX motifs (like C- and D-box) and reports them in a BED file
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>