changeset 4:63df1e23f4ff draft

planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/segmentation_fold_galaxy_wrapper commit 00690c63c51a7f7563f2428c313d7fa75f2657e5-dirty
author yhoogstrate
date Thu, 28 Jul 2016 10:25:37 -0400
parents cd1bba1c66b3
children b7cf9b172cfe
files energy-estimation-utility.xml macros.xml scan-for-segments.xml segmentation-fold.xml test-data/DBNFile.test_01.in.bam test-data/DBNFile.test_01.in.dbn test-data/DBNFile.test_02.in.bed test-data/DBNFile.test_02.in.dbn test-data/DBNFile.test_02.out.n.dbn test-data/DBNFile.test_02.out.o.dbn test-data/DBNFile.test_03.in.dbn test-data/DBNFile.test_03.out.l.dbn test-data/DBNFile.test_03.out.s.dbn test-data/ExtractBoxedSequences.test_01.in.bed test-data/ExtractBoxedSequences.test_01.in.fa test-data/ExtractBoxedSequences.test_01.out.fa test-data/FindBoxes.genome.fa test-data/FindBoxes.test_02.bed test-data/SNORD114-4-revised.scan-for-segments.txt test-data/SNORD118-revised.scan-for-segments.txt test-data/SNORD13-revised.scan-for-segments.txt test-data/SNORD48-revised.scan-for-segments.txt test-data/segments_truncated.2.out.txt test-data/segments_truncated.out.txt tool_dependencies.xml utils_add-read-counts.xml utils_estimate-energy.xml utils_extract-boxed-sequences.xml utils_filter-annotated-entries.xml utils_filter-by-energy.xml utils_find-boxes.xml
diffstat 31 files changed, 994 insertions(+), 430 deletions(-) [+]
line wrap: on
line diff
--- a/energy-estimation-utility.xml	Thu Mar 31 04:26:12 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-<tool id="energy_estimation_utility" name="energy-estimation-utility" version="1.6.3-1">
-    <description>Estimate the maximal energy a segment needs to assign to become part of the optimal structure using segmentation-fold</description>
-    
-    <requirements>
-        <requirement type="package" version="1.6.3">segmentation-fold</requirement>
-        <requirement type="package" version="2.7.10">python</requirement>
-    </requirements>
-    
-    <stdio></stdio>
-    
-    <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command>
-    
-    <command><![CDATA[
-        energy-estimation-utility
-            #if $parameters.use_custom_xml == "true"
-                -x "${parameters.input_xml}"
-            #else
-                -x "\$SEGMENTATION_FOLD_DEFAULT_XML"
-            #end if
-            
-            -r ${randomization.shuffle_n_times}
-        > "${output_list}"
-    ]]></command>
-    
-    <inputs>
-        <conditional name="parameters">
-            <param name="use_custom_xml"
-                type="boolean"
-                truevalue="true"
-                falsevalue="false"
-                selected="false"
-                label="Use segment definition from history" />
-
-            <when value="false" />
-            <when value="true">
-                <param name="input_xml"
-                    type="data"
-                    format="xml"
-                    multiple="false"
-                    argument="-x"
-                    label="Custom 'segments.xml'-syntaxed file" />
-            </when>
-        </conditional>
-        
-        <conditional name="randomization">
-            <param name="do_randomization"
-                type="boolean"
-                truevalue="true"
-                falsevalue="false"
-                selected="false"
-                label="Randomly shuffle the sequence(s) instead"
-                help="This can be helpful in determining a baseline of observing an energy parameter by chance" />
-
-            <when value="false">
-                <param name="shuffle_n_times"
-                       type="hidden"
-                       value="0"/>
-            </when>
-            <when value="true">
-                <param name="shuffle_n_times"
-                       type="integer"
-                       min="0"
-                       value="10" 
-                       argument="-r"
-                       label="Number of times the sequences have to be shuffled and energy parameters have to be estimated on" />
-            </when>
-        </conditional>
-    </inputs>
-    
-    <outputs>
-        <data format="text" name="output_list" label="${tool.name}" />
-    </outputs>
-    
-    <tests>
-        <test>
-            <param  name="use_custom_xml" value="true" />
-            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
-            <param  name="do_randomization" value="false" />
-            <param  name="shuffle_n_times" value="0" />
-            
-            <output name="output_list" file="segments_truncated.out.txt" lines_diff="2" /><!-- Accept rounding errors by diff CPU's etc. -->
-        </test>
-        <test>
-            <param  name="use_custom_xml" value="true" />
-            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
-            <param  name="do_randomization" value="false" />
-            <param  name="shuffle_n_times" value="1" />
-            
-            <output name="output_list" file="segments_truncated.2.out.txt" lines_diff="10" />
-        </test>
-    </tests>
-    
-    <help><![CDATA[
-The tool uses the sequences from the xml file.
-    ]]></help>
-    
-    <citations>
-        <citation type="bibtex">
-           @mastersthesis{mastersthesis,
-              author       = {Youri Hoogstrate}, 
-              title        = {An algorithm for predicting RNA 2D structures including K-turns},
-              school       = {University of Technology Delft, Leiden University},
-              year         = 2012,
-              address      = {},
-              month        = 11,
-              note         = {Research assignment for Master Computer-science},
-              url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
-            }
-        </citation>
-    </citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,44 @@
+<macros>
+    <token name="@VERSION@">smf-v1.6-5_utils-v2.0.1</token>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+        </stdio>
+    </xml>
+
+    <token name="@VERSION_COMMAND_SMF@">segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</token>
+    <token name="@VERSION_COMMAND_UTILS@">segmentation-fold-utils --version</token>
+
+
+    <token name="@REQUIREMENTS_UTILS@"><![CDATA[
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    ]]></token>
+
+    <token name="@REQUIREMENTS_SMF@"><![CDATA[
+        <requirement type="package" version="1.6.5">segmentation-fold</requirement>
+    ]]></token>
+
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+               @mastersthesis{mastersthesis,
+                  author       = {Youri Hoogstrate}, 
+                  title        = {An algorithm for predicting RNA 2D structures including K-turns},
+                  school       = {University of Technology Delft, Leiden University},
+                  year         = 2012,
+                  address      = {},
+                  month        = 11,
+                  note         = {Research assignment for Master Computer-science},
+                  url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
+                }
+            </citation>
+        </citations>
+    </xml>
+</macros>
--- a/scan-for-segments.xml	Thu Mar 31 04:26:12 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-<tool id="scan_for_segments" name="scan-for-segments" version="1.6.3-1">
-    <description>Scan for the presence of segments in sequences using segmentation-fold</description>
-    
-    <requirements>
-        <requirement type="package" version="1.6.3">segmentation-fold</requirement>
-        <requirement type="package" version="2.7.10">python</requirement>
-    </requirements>
-    
-    <stdio></stdio>
-    
-    <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command>
-    
-    <command><![CDATA[
-        scan-for-segments
-            -T \${GALAXY_SLOTS:-4}
-            -x
-            #if str($parameters.use_custom_xml) == "true"
-                "${parameters.input_xml}"
-            #else
-                "\$SEGMENTATION_FOLD_DEFAULT_XML"
-            #end if
-            -p "in-depth"
-            "${input_fasta}"
-            
-            >   $output_list
-    ]]></command>
-
-    <inputs>
-        <param name="input_fasta"
-               type="data"
-               format="fasta"
-               argument="-f"
-               label="Fasta file with RNA-sequece" />
-
-        <conditional name="parameters">
-            <param name="use_custom_xml"
-                type="boolean"
-                truevalue="true"
-                falsevalue="false"
-                selected="false"
-                label="Use segment definition from history" />
-
-            <when value="false" />
-            <when value="true">
-                <param name="input_xml"
-                    type="data"
-                    format="xml"
-                    multiple="false"
-                    argument="-x"
-                    label="Custom 'segments.xml'-syntaxed file" />
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data format="text" name="output_list" label="${tool.name} on ${str($input_fasta.hid) + ': ' + $input_fasta.name}" />
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="input_fasta" value="SNORD13-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD13-revised.scan-for-segments.txt" />
-        </test>
-        <test>
-            <param name="input_fasta" value="SNORD48-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD48-revised.scan-for-segments.txt" />
-        </test>
-        <test>
-            <param name="input_fasta" value="SNORD114-4-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD114-4-revised.scan-for-segments.txt" />
-        </test>
-        <test>
-            <param name="input_fasta" value="SNORD118-revised.fa" ftype="fasta" />
-            <param name="use_custom_xml" value="false" ftype="fasta" />
-            
-            <output name="output_list" file="SNORD118-revised.scan-for-segments.txt" />
-        </test>
-    </tests>
-    
-    <help><![CDATA[
-This is an utility of the segmentation-fold package that allows to scan for the presence of certain segments.
-If present, it will also scan for the Gibbs free energy necessairy the segment has to provide to contribute to the optimal structure.
-    ]]></help>
-    
-    <citations>
-        <citation type="bibtex">
-           @mastersthesis{mastersthesis,
-              author       = {Youri Hoogstrate}, 
-              title        = {An algorithm for predicting RNA 2D structures including K-turns},
-              school       = {University of Technology Delft, Leiden University},
-              year         = 2012,
-              address      = {},
-              month        = 11,
-              note         = {Research assignment for Master Computer-science},
-              url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
-            }
-        </citation>
-    </citations>
-</tool>
--- a/segmentation-fold.xml	Thu Mar 31 04:26:12 2016 -0400
+++ b/segmentation-fold.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -1,13 +1,16 @@
-<tool id="segmentation_fold" name="segmentation-fold" version="1.6.3-1">
+<tool id="segmentation_fold" name="segmentation-fold" version="@VERSION@-1">
     <description>RNA-Folding including predefined segments including K-turns</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     
     <requirements>
-        <requirement type="package" version="1.6.3">segmentation-fold</requirement>
+        <requirement type="package" version="1.6.5">segmentation-fold</requirement>
     </requirements>
     
-    <stdio></stdio>
+    <expand macro="stdio" />
     
-    <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command>
+    <version_command>@VERSION_COMMAND_SMF@</version_command>
     
     <command><![CDATA[
         segmentation-fold
@@ -234,18 +237,5 @@
 Youri Hoogstrate (yhoogstrate @ github)
     ]]></help>
     
-    <citations>
-        <citation type="bibtex">
-           @mastersthesis{mastersthesis,
-              author       = {Youri Hoogstrate}, 
-              title        = {An algorithm for predicting RNA 2D structures including K-turns},
-              school       = {University of Technology Delft, Leiden University},
-              year         = 2012,
-              address      = {},
-              month        = 11,
-              note         = {Research assignment for Master Computer-science},
-              url          = { https://yh-kt-fold.googlecode.com/files/Report.pdf }
-            }
-        </citation>
-    </citations>
+    <expand macro="citations" />
 </tool>
Binary file test-data/DBNFile.test_01.in.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_01.in.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,8 @@
+>chr1:10-21 x unknown-01
+GGGGAAACCCC
+((((...))))	((.((.)).))	-2.5
+((.((.)).))	(((((.)))))	-3.5
+>chr1:25-36 x unknown-01
+AAAAAAAAAAA
+>chr1:45-56 x unknown-01
+AAAAAAAAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.in.bed	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,6 @@
+chr1	0	1	firstbase	0	+
+chr1	0	5	1-2-3-4-5	0	+
+chr1	5	10	6-7-8-9-10	0	+
+chr1	10	11	hideme	0	+
+chr2	0	5	hideme2	0	+
+chr2	5	10	hideme3	0	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.in.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,6 @@
+>chr1:0-10 x unknown-01
+AAAAAAAAAA
+>chr1:25-36 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 1)
+AAAAAAAAAAA
+>chr1:45-56 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 2)
+AAAAAAAAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.out.n.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,4 @@
+>chr1:25-36 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 1)
+AAAAAAAAAAA
+>chr1:45-56 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 2)
+AAAAAAAAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_02.out.o.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,2 @@
+>chr1:0-10 x unknown-01 (overlap in tests/test-data/DBNFile.test_02.in.bed: firstbase,1-2-3-4-5,6-7-8-9-10)
+AAAAAAAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_03.in.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,42 @@
+>chr3.rna:5-35(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna:5-35(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........	.((...(((((...........))))))).	-13.125
+>chr3.rna:5-80(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:5-80(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+>chr3.rna:50-80(+) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:50-80(+) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........	.((...(((((....(....).))))))).	-13.125
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+...((............(((((.((.((((.....)))).))))))).((((((...........))))))))	...((..((((((............))...)))).(((......))).((((((...........))))))))	0.0
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+.........(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))......	..(((..((((((............))...))))...........(((((((((...........)))))))))...))).......	0.0
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+>chr3.rna.RC:35-5(-) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:35-5(-) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........	.((...(((((....(....).))))))).	-13.125
+>chr3.rna.RC:80-5(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:80-5(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+>chr3.rna.RC:80-50(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna.RC:80-50(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........	.((...(((((...........))))))).	-13.125
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_03.out.l.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,14 @@
+>chr3.rna:5-80(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+...((............(((((.((.((((.....)))).))))))).((((((...........))))))))	...((..((((((............))...)))).(((......))).((((((...........))))))))	0.0
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.CGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+.........(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))......	..(((..((((((............))...))))...........(((((((((...........)))))))))...))).......	0.0
+>chr3.rna.RC:80-5(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
+(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))	(((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))	0.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/DBNFile.test_03.out.s.dbn	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,28 @@
+>chr3.rna:5-35(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna:5-35(+) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........	.((...(((((...........))))))).	-13.125
+>chr3.rna:5-80(+) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:50-80(+) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna:50-80(+) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........	.((...(((((....(....).))))))).	-13.125
+>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG
+>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.UGU
+GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT
+>chr3.rna.RC:35-5(-) x Kt-CD-box.CGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:35-5(-) x Kt-CD-box.UGU
+ACTTGTGATGAAACACTCATGGTCTGAAGA
+..(..(((((...)).)))..)........	.((...(((((....(....).))))))).	-13.125
+>chr3.rna.RC:80-5(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA
+>chr3.rna.RC:80-50(-) x Kt-CD-box.CGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+>chr3.rna.RC:80-50(-) x Kt-CD-box.UGU
+TCTTGTGATGAGAAGTACTGGATCTGAAGT
+(((.(((........))).)))........	.((...(((((...........))))))).	-13.125
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ExtractBoxedSequences.test_01.in.bed	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,6 @@
+chr10	0	7	box1-f:NRTGATG	0	+
+chr10	14	18	box2-f:CTGA	0	+
+chr10	28	35	box1-f:NRTGATG	0	+
+chr10	42	46	box2-f:CTGA	0	+
+chr10	56	63	box1-f:NRTGATG	0	+
+chr10	70	74	box2-f:CTGA	0	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ExtractBoxedSequences.test_01.in.fa	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,12 @@
+>chr10
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ExtractBoxedSequences.test_01.out.fa	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,12 @@
+>chr10:0-18(+)
+AATGATGaaaaaaaCTGA
+>chr10:0-46(+)
+AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA
+>chr10:0-74(+)
+AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA
+>chr10:28-46(+)
+AATGATGaaaaaaaCTGA
+>chr10:28-74(+)
+AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA
+>chr10:56-74(+)
+AATGATGaaaaaaaCTGA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FindBoxes.genome.fa	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,44 @@
+>chr1
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+>chr2
+aaaaaCTGAaaaaaaaCTGAaaaaa
+>chr3
+CTGAaaaaaaaCTGA
+>chr4
+CTGACTGA
+>chr5
+TCAGaaaaaaTCAG
+>chr6
+tcagAAAAAAtcag
+>chr7
+AATGATG
+CATGATG
+TATGATG
+GATGATG
+AGTGATG
+CGTGATG
+TGTGATG
+GGTGATG
+>chr8_no_valid_C_boxes
+ACTGATG
+CCTGATG
+TCTGATG
+GCTGATG
+ATTGATG
+CTTGATG
+TTTGATG
+GTTGATG
+>chr9
+CATCACCCATCACACATCACGCATCACTCATCATCCATCATACATCATGCATCATT
+>chr10
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
+ccccccc
+AATGATG
+aaaaaaa
+CTGAaaa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FindBoxes.test_02.bed	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,36 @@
+chr2	5	9	box2-f:CTGA	0	+
+chr2	16	20	box2-f:CTGA	0	+
+chr3	0	4	box2-f:CTGA	0	+
+chr3	11	15	box2-f:CTGA	0	+
+chr4	0	4	box2-f:CTGA	0	+
+chr4	4	8	box2-f:CTGA	0	+
+chr5	0	4	box2-r:TCAG	0	-
+chr5	10	14	box2-r:TCAG	0	-
+chr6	0	4	box2-r:TCAG	0	-
+chr6	10	14	box2-r:TCAG	0	-
+chr7	0	7	box1-f:NRTGATG	0	+
+chr7	7	14	box1-f:NRTGATG	0	+
+chr7	14	21	box1-f:NRTGATG	0	+
+chr7	21	28	box1-f:NRTGATG	0	+
+chr7	28	35	box1-f:NRTGATG	0	+
+chr7	35	42	box1-f:NRTGATG	0	+
+chr7	42	49	box1-f:NRTGATG	0	+
+chr7	49	56	box1-f:NRTGATG	0	+
+chr8_no_valid_C_boxes	1	5	box2-f:CTGA	0	+
+chr8_no_valid_C_boxes	8	12	box2-f:CTGA	0	+
+chr8_no_valid_C_boxes	15	19	box2-f:CTGA	0	+
+chr8_no_valid_C_boxes	22	26	box2-f:CTGA	0	+
+chr9	0	7	box1-r:CATCAYN	0	-
+chr9	7	14	box1-r:CATCAYN	0	-
+chr9	14	21	box1-r:CATCAYN	0	-
+chr9	21	28	box1-r:CATCAYN	0	-
+chr9	28	35	box1-r:CATCAYN	0	-
+chr9	35	42	box1-r:CATCAYN	0	-
+chr9	42	49	box1-r:CATCAYN	0	-
+chr9	49	56	box1-r:CATCAYN	0	-
+chr10	0	7	box1-f:NRTGATG	0	+
+chr10	14	18	box2-f:CTGA	0	+
+chr10	28	35	box1-f:NRTGATG	0	+
+chr10	42	46	box2-f:CTGA	0	+
+chr10	56	63	box1-f:NRTGATG	0	+
+chr10	70	74	box2-f:CTGA	0	+
--- a/test-data/SNORD114-4-revised.scan-for-segments.txt	Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD114-4-revised.scan-for-segments.txt	Thu Jul 28 10:25:37 2016 -0400
@@ -1,74 +1,57 @@
->SNORD114-4 x Kt-42.dra
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-42.eco
+>SNORD114-4 revised x Kt-7 G2nA SAM riboswitch (H. marismortui)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Loop-E-Motif.bac
+>SNORD114-4 revised x Kt-7 (T. thermophilus)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.UAU
+>SNORD114-4 revised x Kt-7 (E. coli)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.UGU
+>SNORD114-4 revised x Kt-7 (D. radiodurans)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.CAU
+>SNORD114-4 revised x Kt-11 (T. thermophilus)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-42.tth
+>SNORD114-4 revised x Kt-11.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 G2nA SAM riboswitch (H. marismortui)
+>SNORD114-4 revised x Kt-15.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-15.hma
+.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..)))))	.((((((....(((.((.....((((......))))((....))...)...))))...(((....))).))))))	-6.42471313477
+>SNORD114-4 revised x Kt-23.tth
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
-.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..)))))	.((((((....(((.((.....((((......))))((....))...)...))))...(((....))).))))))	-6.4197063446
->SNORD114-4 x Kt-U4b.hsa
+>SNORD114-4 revised x Kt-23.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.GGU
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 (E. coli)
+>SNORD114-4 revised x Kt-38.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 (D. radiodurans)
+>SNORD114-4 revised x Kt-42.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-58.hma
+>SNORD114-4 revised x Kt-42.tth
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.tth
+>SNORD114-4 revised x Kt-42.dra
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-SAM-ribo.tte
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-23.eco
+>SNORD114-4 revised x Kt-42.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-38.hma
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.dra
+>SNORD114-4 revised x Kt-46.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.CGU
+>SNORD114-4 revised x Kt-46.tth
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-T-box.bsu
+>SNORD114-4 revised x Kt-46.dra
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-23.tth
+>SNORD114-4 revised x Kt-46.eco
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.eco
+>SNORD114-4 revised x Kt-58.hma
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-c-di-GMP-II.cac
+>SNORD114-4 revised x Kt-U4a.hsa
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-11 (T. thermophilus)
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.AAU
+>SNORD114-4 revised x Kt-U4b.hsa
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-L30e.sce
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-7 (T. thermophilus)
+>SNORD114-4 revised x Kt-CD-box.CGU
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-42.hma
+>SNORD114-4 revised x Kt-CD-box.UGU
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-U4a.hsa
-CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-G2nA-SAMribo.bsu
+>SNORD114-4 revised x Kt-L30e.sce
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.GAU
+>SNORD114-4 revised x Kt-SAM-ribo.tte
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-11.eco
+>SNORD114-4 revised x Kt-T-box.bsu
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-46.hma
+>SNORD114-4 revised x Kt-c-di-GMP-II.cac
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
->SNORD114-4 x Kt-CD-box.AGU
+>SNORD114-4 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA
-.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..)))))	.((((((...(((((.......((((......)))).((((((..........)))))).....)))))))))))	1.99971199036
--- a/test-data/SNORD118-revised.scan-for-segments.txt	Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD118-revised.scan-for-segments.txt	Thu Jul 28 10:25:37 2016 -0400
@@ -1,75 +1,57 @@
->SNORD118 x Kt-42.dra
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-42.eco
+>SNORD118 revised x Kt-7 G2nA SAM riboswitch (H. marismortui)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Loop-E-Motif.bac
+>SNORD118 revised x Kt-7 (T. thermophilus)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.UAU
+>SNORD118 revised x Kt-7 (E. coli)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.UGU
+>SNORD118 revised x Kt-7 (D. radiodurans)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.CAU
+>SNORD118 revised x Kt-11 (T. thermophilus)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
-.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).)))))	.(((...((((((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).))))))))).	3.380651474
->SNORD118 x Kt-42.tth
+>SNORD118 revised x Kt-11.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
+>SNORD118 revised x Kt-15.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 G2nA SAM riboswitch (H. marismortui)
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-15.hma
+>SNORD118 revised x Kt-23.tth
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-U4b.hsa
+>SNORD118 revised x Kt-23.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.GGU
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 (E. coli)
+>SNORD118 revised x Kt-38.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 (D. radiodurans)
+>SNORD118 revised x Kt-42.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-58.hma
+>SNORD118 revised x Kt-42.tth
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.tth
+>SNORD118 revised x Kt-42.dra
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-SAM-ribo.tte
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-23.eco
+>SNORD118 revised x Kt-42.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-38.hma
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.dra
+>SNORD118 revised x Kt-46.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.CGU
+>SNORD118 revised x Kt-46.tth
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
-.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).)))))	(((((...((((((............))...))))...))......(((((((((...........))))))))).....)))......	-15.021024704
->SNORD118 x Kt-T-box.bsu
+>SNORD118 revised x Kt-46.dra
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-23.tth
+>SNORD118 revised x Kt-46.eco
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.eco
+>SNORD118 revised x Kt-58.hma
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-c-di-GMP-II.cac
+>SNORD118 revised x Kt-U4a.hsa
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-11 (T. thermophilus)
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.AAU
+>SNORD118 revised x Kt-U4b.hsa
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-L30e.sce
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-7 (T. thermophilus)
+>SNORD118 revised x Kt-CD-box.CGU
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-42.hma
+.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).)))))	(((((...((((((............))...))))...))......(((((((((...........))))))))).....)))......	-15.038848877
+>SNORD118 revised x Kt-CD-box.UGU
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-U4a.hsa
-AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-G2nA-SAMribo.bsu
+>SNORD118 revised x Kt-L30e.sce
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.GAU
+>SNORD118 revised x Kt-SAM-ribo.tte
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
-.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).)))))	(((.(((...(((((......))))))((((.....))))...)).(((((((((...........))))))))).....)))......	-8.30012321472
->SNORD118 x Kt-11.eco
+>SNORD118 revised x Kt-T-box.bsu
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-46.hma
+>SNORD118 revised x Kt-c-di-GMP-II.cac
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
->SNORD118 x Kt-CD-box.AGU
+>SNORD118 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
--- a/test-data/SNORD13-revised.scan-for-segments.txt	Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD13-revised.scan-for-segments.txt	Thu Jul 28 10:25:37 2016 -0400
@@ -1,74 +1,57 @@
->SNORD13 x Kt-42.dra
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-42.eco
+>SNORD13 revised x Kt-7 G2nA SAM riboswitch (H. marismortui)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Loop-E-Motif.bac
+>SNORD13 revised x Kt-7 (T. thermophilus)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.UAU
+>SNORD13 revised x Kt-7 (E. coli)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.UGU
+>SNORD13 revised x Kt-7 (D. radiodurans)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.CAU
+>SNORD13 revised x Kt-11 (T. thermophilus)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-42.tth
+>SNORD13 revised x Kt-11.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 G2nA SAM riboswitch (H. marismortui)
+>SNORD13 revised x Kt-15.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-15.hma
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-U4b.hsa
+>SNORD13 revised x Kt-23.tth
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.GGU
+>SNORD13 revised x Kt-23.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 (E. coli)
+>SNORD13 revised x Kt-38.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 (D. radiodurans)
+>SNORD13 revised x Kt-42.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-58.hma
+>SNORD13 revised x Kt-42.tth
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.tth
+>SNORD13 revised x Kt-42.dra
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-SAM-ribo.tte
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-23.eco
+>SNORD13 revised x Kt-42.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-38.hma
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.dra
+>SNORD13 revised x Kt-46.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.CGU
+>SNORD13 revised x Kt-46.tth
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
-(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..))))))	(((((.(.(...((((((.((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))))))))))	3.35026359558
->SNORD13 x Kt-T-box.bsu
+>SNORD13 revised x Kt-46.dra
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-23.tth
+>SNORD13 revised x Kt-46.eco
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.eco
+>SNORD13 revised x Kt-58.hma
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-c-di-GMP-II.cac
+>SNORD13 revised x Kt-U4a.hsa
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-11 (T. thermophilus)
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.AAU
+>SNORD13 revised x Kt-U4b.hsa
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-L30e.sce
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-7 (T. thermophilus)
+>SNORD13 revised x Kt-CD-box.CGU
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-42.hma
+(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..))))))	(((((.(.(...((((((.((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))))))))))	3.37054443359
+>SNORD13 revised x Kt-CD-box.UGU
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-U4a.hsa
-GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-G2nA-SAMribo.bsu
+>SNORD13 revised x Kt-L30e.sce
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.GAU
+>SNORD13 revised x Kt-SAM-ribo.tte
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
-(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..))))))	(((((((.((((...((((((((((....))))))..)))))...)))))............((((.((((.((........)))))))))).)))))	-9.91994667053
->SNORD13 x Kt-11.eco
+>SNORD13 revised x Kt-T-box.bsu
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-46.hma
+>SNORD13 revised x Kt-c-di-GMP-II.cac
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
->SNORD13 x Kt-CD-box.AGU
+>SNORD13 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
--- a/test-data/SNORD48-revised.scan-for-segments.txt	Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/SNORD48-revised.scan-for-segments.txt	Thu Jul 28 10:25:37 2016 -0400
@@ -1,74 +1,56 @@
->SNORD48 x Kt-42.dra
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-42.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 G2nA SAM riboswitch (H. marismortui)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Loop-E-Motif.bac
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (T. thermophilus)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.UAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (E. coli)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.UGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (D. radiodurans)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.CAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-11 (T. thermophilus)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-42.tth
+>SNORD48 revised (2 extra bases on 5' end) x Kt-11.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 G2nA SAM riboswitch (H. marismortui)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-15.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-15.hma
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-U4b.hsa
+>SNORD48 revised (2 extra bases on 5' end) x Kt-23.tth
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.GGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-23.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 (E. coli)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-38.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 (D. radiodurans)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-58.hma
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.tth
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.tth
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.dra
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-SAM-ribo.tte
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-23.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-42.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-38.hma
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.dra
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.CGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.tth
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-T-box.bsu
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.dra
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-23.tth
+>SNORD48 revised (2 extra bases on 5' end) x Kt-46.eco
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-58.hma
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-c-di-GMP-II.cac
+>SNORD48 revised (2 extra bases on 5' end) x Kt-U4a.hsa
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-11 (T. thermophilus)
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.AAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-U4b.hsa
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-L30e.sce
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-7 (T. thermophilus)
+>SNORD48 revised (2 extra bases on 5' end) x Kt-CD-box.CGU
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-42.hma
+>SNORD48 revised (2 extra bases on 5' end) x Kt-CD-box.UGU
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-U4a.hsa
-GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-G2nA-SAMribo.bsu
+>SNORD48 revised (2 extra bases on 5' end) x Kt-L30e.sce
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.GAU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-SAM-ribo.tte
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
-((((((.((.((((((((((.....)))).).).)))).((((...))))...)).))))))....	(((((...(((((((...))).....))))).....(((((............)))))))))....	-10.9206504822
->SNORD48 x Kt-11.eco
+>SNORD48 revised (2 extra bases on 5' end) x Kt-T-box.bsu
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-46.hma
+>SNORD48 revised (2 extra bases on 5' end) x Kt-c-di-GMP-II.cac
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
->SNORD48 x Kt-CD-box.AGU
+>SNORD48 revised (2 extra bases on 5' end) x Kt-G2nA-SAM-riboswitch (T. tengcongensi)
 GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC
-((((((.((.((((((((((.....)))).).).)))).((((...))))...)).))))))....	((...(((((..((..((((.....)))).))((((.((((((...))))..)).)))))))))))	-1.84962844849
--- a/test-data/segments_truncated.2.out.txt	Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/segments_truncated.2.out.txt	Thu Jul 28 10:25:37 2016 -0400
@@ -1,5 +1,7 @@
->C/D-box snoRNA (shuffle iteration 1)
-agaggCGUGAUcccaacgUGAuggc
-....((((.......))))......	....(...((((.....)))))...	-9.3900680542
->Artificial double C/D K-turn construct (shuffle iteration 1)
-uguucugucacggcacauaccuccggUGUGAUggUGAauaguaUGAgaaguaucgugugucagaggcccuaaUGUGAUgccuuaa
+>C/D-box snoRNA x Kt-CD-box.CGU
+GCUCUGACCGAAAGGCGUGAUGAGC
+((((....((.....))....))))	(((((((((....))...)))))))	2.49938964844
+>Artificial double C/D K-turn construct x Kt-CD-box.UGU
+GGGAGUCUUGUGAUGAGAAGUACUGGAUCUGAAGUAGCCCUUUUUGGGCUACUUGUGAUGAAACACUCAUGGUCUGAAGACUCCC
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))))))))	((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))))))))	-1.26037597656
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))))))))	((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))))))))	-1.26037597656
--- a/test-data/segments_truncated.out.txt	Thu Mar 31 04:26:12 2016 -0400
+++ b/test-data/segments_truncated.out.txt	Thu Jul 28 10:25:37 2016 -0400
@@ -1,6 +1,7 @@
->C/D-box snoRNA
+>C/D-box snoRNA x Kt-CD-box.CGU
 GCUCUGACCGAAAGGCGUGAUGAGC
-((((....((.....))....))))	(((((((((....))...)))))))	2.50072479248
->Artificial double C/D K-turn construct
+((((....((.....))....))))	(((((((((....))...)))))))	2.49938964844
+>Artificial double C/D K-turn construct x Kt-CD-box.UGU
 GGGAGUCUUGUGAUGAGAAGUACUGGAUCUGAAGUAGCCCUUUUUGGGCUACUUGUGAUGAAACACUCAUGGUCUGAAGACUCCC
-((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))))))))	((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))))))))	-1.25102996826
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))))))))	((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))))))))	-1.26037597656
+((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))))))))	((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))))))))	-1.26037597656
--- a/tool_dependencies.xml	Thu Mar 31 04:26:12 2016 -0400
+++ b/tool_dependencies.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -1,10 +1,24 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="segmentation-fold" version="1.6.3">
-        <repository changeset_revision="f4981e860e2c" name="package_segmentation_fold_1_6_3" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="segmentation-fold" version="1.6.5">
+        <repository changeset_revision="f448376f428f" name="package_segmentation_fold_1_6_5__utils_2_0_1" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     
+
     <package name="python" version="2.7.10">
         <repository changeset_revision="0339c4a9b87b" name="package_python_2_7_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
+    <package name="numpy" version="1.9">
+        <repository changeset_revision="f24fc0b630fc" name="package_python_2_7_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="pysam" version="0.8.2.1">
+        <repository changeset_revision="f8fecf1f6eba" name="package_pysam_0_8_2" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="htseq" version="0.6.1">
+        <repository changeset_revision="fbb72996807d" name="package_htseq_0_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    
+    <package name="segmentation-fold-utils" version="2.0.1">
+        <repository changeset_revision="f448376f428f" name="package_segmentation_fold_1_6_5__utils_2_0_1" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
 </tool_dependency>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_add-read-counts.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,81 @@
+<tool id="smf_utils_add-read-counts" name="add-read-counts" version="@VERSION@-3">
+    <description>Annotate sequences by adding the read counts from a bam file, within a region contained in the fasta header of the dbn file</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        ln -f -s '${bam_input_file.metadata.bam_index}' '${bam_input_file}.bai' &&
+        
+        segmentation-fold-utils
+            add-read-counts
+                --regex '${regex.replace("'","\\'")}'
+                '$dbn_input_file'
+                '$bam_input_file'
+                '$dbn_output_file'
+    ]]></command>
+
+    <inputs>
+        <param name="dbn_input_file"
+               type="data"
+               format="dbn,txt,text"
+               label="Input DBN file"
+               help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/>
+        <param name="bam_input_file"
+               type="data"
+               format="bam"
+               label="Input BAM file"/>
+        <param name="regex"
+               type="text"
+               argument="--regex"
+               value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'
+               label="Regex to capture the targeted location in DBN file"
+               help="Do not change this value unless you're using customized software in the pipeline - default: '>.*?(chr[^:]):([0-9]+)-([0-9]+)'" />
+    </inputs>
+
+    <outputs>
+        <data name="dbn_output_file"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="dbn_input_file" value="DBNFile.test_01.in.dbn" ftype="dbn"/>
+            <param name="bam_input_file" value="DBNFile.test_01.in.bam" ftype="bam"/>
+            <param name="regex" value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'/>
+
+            <output name="dbn_output_file">
+                <assert_contents>
+                    <has_line_matching expression="&gt;chr1:10-21 x unknown-01 \(aligned reads .*?: 20\)"/>
+                    <has_line line="GGGGAAACCCC"/>
+                    <has_line line="((((...))))&#009;((.((.)).))&#009;-2.5"/>
+                    <has_line line="((.((.)).))&#009;(((((.)))))&#009;-3.5"/>
+                    
+                    <has_line_matching expression="&gt;chr1:25-36 x unknown-01 \(aligned reads.*?: 1\)"/>
+                    <has_line line="AAAAAAAAAAA"/>
+                    
+                    <has_line_matching expression="&gt;chr1:45-56 x unknown-01 \(aligned reads .*?: 2\)"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+This is an utility of the segmentation-fold package
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_estimate-energy.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,144 @@
+<tool id="smf_utils_estimate-energy" name="estimate-energy" version="@VERSION@-2">
+    <description>Estimates whether a certain Segment(Loop) is present and for which delta-G this transistion takes place</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+
+        <requirement type="package" version="1.6.5">segmentation-fold</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>echo $(@VERSION_COMMAND_SMF@)", "$(@VERSION_COMMAND_UTILS@)</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            estimate-energy
+                -T \${GALAXY_SLOTS:-2}
+                -x
+                #if str($parameters.use_custom_xml) == "true"
+                    "${parameters.input_xml}"
+                #else
+                    "\$SEGMENTATION_FOLD_DEFAULT_XML"
+                #end if
+                
+                -p $precision
+                -r $randomize
+                
+                #if $sequences_from_fasta_file:
+                    --sequences-from-fasta-file "${sequences_from_fasta_file}"
+                #end if
+                
+                $output_list
+    ]]></command>
+
+    <inputs>
+        <conditional name="parameters">
+            <param name="use_custom_xml"
+                type="boolean"
+                truevalue="true"
+                falsevalue="false"
+                selected="false"
+                label="Use segment definition from history" />
+
+            <when value="false" />
+            <when value="true">
+                <param name="input_xml"
+                    type="data"
+                    format="xml"
+                    multiple="false"
+                    argument="-x"
+                    label="Custom 'segments.xml'-syntaxed file" />
+            </when>
+        </conditional>
+        
+        <param name="precision"
+               type="float"
+               value="0.05"
+               min="0"
+               argument="--precision"
+               label="Precision"
+               help="Minimal difference for binary split - the smaller this value the slower. if this value equals 0, the difference is set to infinity (default: 0.05)" />
+
+        <param name="randomize"
+               type="integer"
+               value="0"
+               min="0"
+               argument="--randomize"
+               label="Shuffle each sequence this many times and predict energy of shuffled sequence(s) (default: 0, 0 means disabled)" />
+        
+        <param name="sequences_from_fasta_file"
+               type="data"
+               format="fasta"
+               multiple="false"
+               optional="true"
+               argument="--sequences-from-fasta-file"
+               label="Optional sequences to scan for Segment(Loop)s (FASTA)"
+               help="Use sequences from a FASTA file instead of the XML file that contains the segments. In XML files you can explicitly link one Segment(Loop) to one particular sequence instead of doing n*n comparisons (default: None)" />
+    </inputs>
+
+    <outputs>
+        <data format="dbn" name="output_list" label="${tool.name}" />
+    </outputs>
+
+    <tests>
+        <!-- xml * fasta mode -->
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD13-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD13-revised.scan-for-segments.txt" />
+        </test>
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD48-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD48-revised.scan-for-segments.txt" />
+        </test>
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD114-4-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD114-4-revised.scan-for-segments.txt" />
+        </test>
+        <test>
+            <param name="use_custom_xml" value="false" ftype="fasta" />
+            <param name="sequences_from_fasta_file" value="SNORD118-revised.fa" ftype="fasta" />
+            
+            <output name="output_list" file="SNORD118-revised.scan-for-segments.txt" />
+        </test>
+        
+        
+        <!-- xml * xml mode -->
+        <test>
+            <param  name="use_custom_xml" value="true" />
+            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
+            <param  name="do_randomization" value="false" />
+            <param  name="shuffle_n_times" value="0" />
+            
+            <output name="output_list" file="segments_truncated.out.txt" lines_diff="2" />
+        </test>
+        <test>
+            <param  name="use_custom_xml" value="true" />
+            <param  name="input_xml" value="segments_truncated.xml" ftype="xml" />
+            <param  name="do_randomization" value="false" />
+            <param  name="shuffle_n_times" value="1" />
+            
+            <output name="output_list" file="segments_truncated.2.out.txt" lines_diff="10" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+This is an utility of the segmentation-fold package that allows to scan for the presence of certain segments.
+If present, it will also scan for the Gibbs free energy necessairy the segment has to provide to contribute to the optimal structure.
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_extract-boxed-sequences.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,114 @@
+<tool id="smf_utils_extract-boxed-sequences" name="extract-boxed-sequences" version="@VERSION@-1">
+    <description>Extracts boxed sequences from bed_input_file which has to be created with 'find-box', part of this utility</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            extract-boxed-sequences
+
+                --max-inner-dist $max_inner_dist
+                --bp-extension   $bp_extension
+                
+                '${fasta_input_file}'
+                '${bed_input_file}'
+                '${fasta_output_file}'
+
+    ]]></command>
+
+    <inputs>
+        <param name="fasta_input_file"
+               type="data"
+               format="fasta"
+               label="Genomic reference FASTA file"/>
+        <param name="bed_input_file"
+               type="data"
+               format="bed"
+               label="BED file containing the sequence boxes"
+               help="This file should have been created with 'find-boxes'"/>
+        
+        <param name="max_inner_dist"
+               type="integer"
+               min="0"
+               value="250"
+               label="Maximal distance between the boxes"
+               help="(default=250bp)"/>
+        <param name="bp_extension"
+               type="integer"
+               min="0"
+               value="10"
+               label="Extend extracted sequences with this number of bases"
+               help="(default: 10bp)"/>
+    </inputs>
+
+    <outputs>
+        <data name="fasta_output_file"
+              format="fasta"
+              label="${tool.name} on ${fasta_input_file.hid}: ${fasta_input_file.name}"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="fasta_input_file" value="ExtractBoxedSequences.test_01.in.fa" ftype="fasta"/>
+            <param name="bed_input_file" value="ExtractBoxedSequences.test_01.in.bed" ftype="bed"/>
+            <param name="max_inner_dist" value='100'/>
+            <param name="bp_extension" value='0'/>
+
+            <output name="fasta_output_file" file="ExtractBoxedSequences.test_01.out.fa"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+extact-boxed-sequences
+----------------------
+*Extracts boxes sequences from `bed_input_file` which has to be created with 'find-box', also part of this utility*
+
+The user can use this utility to extract sequences containing the boxes provided in the bed file by `find-boxes`.
+
+**input**
+
+Important information about the input:
+
+ - `FASTA_INPUT_FILE` can be any generic FASTA file that can be read with pysam. This means that if the sequence is split into multiple lines, they must all be at the same length.
+ - `BED_INPUT_FILE` the bed file should be provided by `find-boxes` as it properly denotes the names (box1-f, box1-r, box2-f and box2-r) which are used for extraction.
+ - `-d, --max-inner-dist INTEGER` Only sequences for which the distance in bases between the boxes is smaller than this distance, will be extracted. Boxes are excluded from this distance.
+ - `-e, --bp-extension INTEGER` Each sequence will be exteded with:
+  * The boxes
+  * An optional number of bases provided with this argument
+
+**output**
+
+Be aware that there can be overlapping sequences. For example, if you started box1=`TTTT` and box2=`CCCC` with the following sequence, you will extract 2 sequences:
+
+```>seq
+gagagaTTTTgagagaTTTTgagagagagagagagaCCCCgaga
+```
+
+Namely:
+
+```TTTTgagagaTTTTgagagagagagagagaCCCC
+```
+
+and
+
+```          TTTTgagagagagagagagaCCCC
+```
+
+This is an utility of the segmentation-fold package
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_filter-annotated-entries.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,84 @@
+<tool id="smf_utils_filter-annotated-entries" name="filter-annotated-entries" version="@VERSION@-1">
+    <description>Split entries into two files based on whether they overlap annotations in a bed file</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            filter-annotated-entries
+                --regex '${regex.replace("'","\\'")}'
+                '$dbn_input_file'
+                '$bed_input_file'
+                '$dbn_output_file_overlapping'
+                '$dbn_output_file_non_overlapping'
+    ]]></command>
+
+    <inputs>
+        <param name="dbn_input_file"
+               type="data"
+               format="dbn,txt"
+               label="Input DBN file"
+               help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/>
+        <param name="bed_input_file"
+               type="data"
+               format="bed"
+               label="The resultes will be filtered based on overlap with annotations in this BED file"/>
+        <param name="regex"
+               type="text"
+               argument="--regex"
+               value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'
+               label="Regex to capture the targeted location in DBN file"
+               help="Do not change this value unless you're using customized software in the pipeline - default: '>.*?(chr[^:]):([0-9]+)-([0-9]+)'" />
+    </inputs>
+
+    <outputs>
+        <data name="dbn_output_file_overlapping"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - overlapping entries"/>
+        <data name="dbn_output_file_non_overlapping"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - non overlapping entries"/>
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="dbn_input_file" value="DBNFile.test_02.in.dbn" ftype="dbn"/>
+            <param name="bed_input_file" value="DBNFile.test_02.in.bed" ftype="bed"/>
+            <param name="regex" value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'/>
+
+            <output name="dbn_output_file_overlapping">
+                <assert_contents>
+                    <has_line_matching expression="&gt;chr1:0-10 x unknown-01 \(overlap in .*?: firstbase,1-2-3-4-5,6-7-8-9-10\)"/>
+                    <has_line line="AAAAAAAAAA"/>
+                </assert_contents>
+            </output>
+            <output name="dbn_output_file_non_overlapping">
+                <assert_contents>
+                    <has_line_matching expression="&gt;chr1:25-36 x unknown-01 \(aligned reads .*?: 1\)"/>
+                    <has_line line="AAAAAAAAAAA"/>
+                    
+                    <has_line_matching expression="&gt;chr1:45-56 x unknown-01 \(aligned reads .*?: 2\)"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+Filter based on whether the entries in the DBN file are already annotated or not
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_filter-by-energy.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,66 @@
+<tool id="smf_utils_filter-by-energy" name="filter-by-energy" version="@VERSION@-1">
+    <description>Split entries over two files based on the estimated energy</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            filter-by-energy
+                --energy ${energy}
+                '$dbn_input_file'
+                '$dbn_output_file_larger_or_equal'
+                '$dbn_output_file_smaller'
+    ]]></command>
+
+    <inputs>
+        <param name="dbn_input_file"
+               type="data"
+               format="dbn,txt"
+               label="Input DBN file"
+               help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/>
+        <param name="energy"
+               type="float"
+               value="0.0"
+               argument="--energy"
+               label="Energy value"
+               help="Entries with transitions with energy smaller than energy (&lt; e) or without transitions will be put into DBN_OUTPUT_FILE_LARGER_OR_EQUAL and those larger or equal (&gt;= e) to DBN_OUTPUT_FILE_SMALLER" />
+    </inputs>
+
+    <outputs>
+        <data name="dbn_output_file_larger_or_equal"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - energy larger/equal than selected"/>
+        <data name="dbn_output_file_smaller"
+              format="dbn"
+              label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - energy smaller than selected"/>
+    </outputs>
+    
+    <tests>
+        <test>
+            <param name="dbn_input_file" value="DBNFile.test_03.in.dbn" ftype="dbn"/>
+            <param name="energy" value='0.0'/>
+
+            <output name="dbn_output_file_larger_or_equal" file="DBNFile.test_03.out.l.dbn" />
+            <output name="dbn_output_file_smaller" file="DBNFile.test_03.out.s.dbn" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+Filter based on whether the entries in the DBN file are already annotated or not
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils_find-boxes.xml	Thu Jul 28 10:25:37 2016 -0400
@@ -0,0 +1,90 @@
+<tool id="smf_utils_find-boxes" name="find-boxes" version="@VERSION@-1">
+    <description>Finds all occurances of two given boxes (sequence motifs) within a FASTA file</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    
+    <requirements>
+        <requirement type="package" version="2.7.10">python</requirement>
+        <requirement type="package" version="1.9">numpy</requirement>
+        <requirement type="package" version="0.8.2.1">pysam</requirement>
+        <requirement type="package" version="0.6.1">htseq</requirement>
+        <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement>
+    </requirements>
+    
+    <expand macro="stdio" />
+
+    <version_command>@VERSION_COMMAND_UTILS@</version_command>
+    
+    <command><![CDATA[
+        segmentation-fold-utils
+            find-boxes
+                --box1 '${box1}'
+                --box2 '${box2}'
+                $forward
+                $reverse
+                '${fasta_input}'
+                '${bed_output}'
+    ]]></command>
+
+    <inputs>
+        <param name="fasta_input"
+               type="data"
+               format="fasta"
+               argument="-f"
+               label="Fasta file with RNA-sequece" />
+
+        <param name="box1"
+               type="text"
+               value="NRUGAUG"
+               argument="--box1"
+               label="Sequence of box1 (default = C-box: 'NRUGAUG')"
+               help="Sequence encoding can be found at the following url: https://en.wikipedia.org/wiki/FASTA_format#Sequence_representations" />
+
+        <param name="box2"
+               type="text"
+               value="CUGA"
+               argument="--box2"
+               label="Sequence of box2 (default = D-box: 'CUGA')"
+               help="Sequence encoding can be found at the following url: https://en.wikipedia.org/wiki/FASTA_format#Sequence_representations" />
+
+        <param name="forward"
+               type="boolean"
+               truevalue="--forward"
+               falsevalue="--no-forward"
+               checked="true"
+               label="Search in the forward direction of the reference sequence" />
+
+        <param name="reverse"
+               type="boolean"
+               truevalue="--reverse"
+               falsevalue="--no-reverse"
+               checked="true"
+               label="Search in the reverse complement of the reference sequence" />
+    </inputs>
+
+    <outputs>
+        <data format="bed"
+              name="bed_output"
+              label="${tool.name} on ${str($fasta_input.hid) + ': ' + $fasta_input.name}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="fasta_input" value="FindBoxes.genome.fa" format="fasta" />
+            <param name="box1" value="NRUGAUG" />
+            <param name="box2" value="CUGA" />
+            <param name="forward" value="--forward" />
+            <param name="reverse" value="--reverse" />
+            
+            <output name="bed_output" file="FindBoxes.test_02.bed" />
+        </test>
+    </tests>
+    
+    <help><![CDATA[
+Scans a FASTA reference for BOX motifs (like C- and D-box) and reports them in a BED file
+    ]]></help>
+    
+    <expand macro="citations" />
+</tool>