Mercurial > repos > yhoogstrate > segmentation_fold
changeset 4:63df1e23f4ff draft
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/segmentation_fold_galaxy_wrapper commit 00690c63c51a7f7563f2428c313d7fa75f2657e5-dirty
line wrap: on
line diff
--- a/energy-estimation-utility.xml Thu Mar 31 04:26:12 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,111 +0,0 @@ -<tool id="energy_estimation_utility" name="energy-estimation-utility" version="1.6.3-1"> - <description>Estimate the maximal energy a segment needs to assign to become part of the optimal structure using segmentation-fold</description> - - <requirements> - <requirement type="package" version="1.6.3">segmentation-fold</requirement> - <requirement type="package" version="2.7.10">python</requirement> - </requirements> - - <stdio></stdio> - - <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command> - - <command><![CDATA[ - energy-estimation-utility - #if $parameters.use_custom_xml == "true" - -x "${parameters.input_xml}" - #else - -x "\$SEGMENTATION_FOLD_DEFAULT_XML" - #end if - - -r ${randomization.shuffle_n_times} - > "${output_list}" - ]]></command> - - <inputs> - <conditional name="parameters"> - <param name="use_custom_xml" - type="boolean" - truevalue="true" - falsevalue="false" - selected="false" - label="Use segment definition from history" /> - - <when value="false" /> - <when value="true"> - <param name="input_xml" - type="data" - format="xml" - multiple="false" - argument="-x" - label="Custom 'segments.xml'-syntaxed file" /> - </when> - </conditional> - - <conditional name="randomization"> - <param name="do_randomization" - type="boolean" - truevalue="true" - falsevalue="false" - selected="false" - label="Randomly shuffle the sequence(s) instead" - help="This can be helpful in determining a baseline of observing an energy parameter by chance" /> - - <when value="false"> - <param name="shuffle_n_times" - type="hidden" - value="0"/> - </when> - <when value="true"> - <param name="shuffle_n_times" - type="integer" - min="0" - value="10" - argument="-r" - label="Number of times the sequences have to be shuffled and energy parameters have to be estimated on" /> - </when> - </conditional> - </inputs> - - <outputs> - <data format="text" name="output_list" label="${tool.name}" /> - </outputs> - - <tests> - <test> - <param name="use_custom_xml" value="true" /> - <param name="input_xml" value="segments_truncated.xml" ftype="xml" /> - <param name="do_randomization" value="false" /> - <param name="shuffle_n_times" value="0" /> - - <output name="output_list" file="segments_truncated.out.txt" lines_diff="2" /><!-- Accept rounding errors by diff CPU's etc. --> - </test> - <test> - <param name="use_custom_xml" value="true" /> - <param name="input_xml" value="segments_truncated.xml" ftype="xml" /> - <param name="do_randomization" value="false" /> - <param name="shuffle_n_times" value="1" /> - - <output name="output_list" file="segments_truncated.2.out.txt" lines_diff="10" /> - </test> - </tests> - - <help><![CDATA[ -The tool uses the sequences from the xml file. - ]]></help> - - <citations> - <citation type="bibtex"> - @mastersthesis{mastersthesis, - author = {Youri Hoogstrate}, - title = {An algorithm for predicting RNA 2D structures including K-turns}, - school = {University of Technology Delft, Leiden University}, - year = 2012, - address = {}, - month = 11, - note = {Research assignment for Master Computer-science}, - url = { https://yh-kt-fold.googlecode.com/files/Report.pdf } - } - </citation> - </citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,44 @@ +<macros> + <token name="@VERSION@">smf-v1.6-5_utils-v2.0.1</token> + + <xml name="stdio"> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + </xml> + + <token name="@VERSION_COMMAND_SMF@">segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</token> + <token name="@VERSION_COMMAND_UTILS@">segmentation-fold-utils --version</token> + + + <token name="@REQUIREMENTS_UTILS@"><![CDATA[ + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + ]]></token> + + <token name="@REQUIREMENTS_SMF@"><![CDATA[ + <requirement type="package" version="1.6.5">segmentation-fold</requirement> + ]]></token> + + + <xml name="citations"> + <citations> + <citation type="bibtex"> + @mastersthesis{mastersthesis, + author = {Youri Hoogstrate}, + title = {An algorithm for predicting RNA 2D structures including K-turns}, + school = {University of Technology Delft, Leiden University}, + year = 2012, + address = {}, + month = 11, + note = {Research assignment for Master Computer-science}, + url = { https://yh-kt-fold.googlecode.com/files/Report.pdf } + } + </citation> + </citations> + </xml> +</macros>
--- a/scan-for-segments.xml Thu Mar 31 04:26:12 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -<tool id="scan_for_segments" name="scan-for-segments" version="1.6.3-1"> - <description>Scan for the presence of segments in sequences using segmentation-fold</description> - - <requirements> - <requirement type="package" version="1.6.3">segmentation-fold</requirement> - <requirement type="package" version="2.7.10">python</requirement> - </requirements> - - <stdio></stdio> - - <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command> - - <command><![CDATA[ - scan-for-segments - -T \${GALAXY_SLOTS:-4} - -x - #if str($parameters.use_custom_xml) == "true" - "${parameters.input_xml}" - #else - "\$SEGMENTATION_FOLD_DEFAULT_XML" - #end if - -p "in-depth" - "${input_fasta}" - - > $output_list - ]]></command> - - <inputs> - <param name="input_fasta" - type="data" - format="fasta" - argument="-f" - label="Fasta file with RNA-sequece" /> - - <conditional name="parameters"> - <param name="use_custom_xml" - type="boolean" - truevalue="true" - falsevalue="false" - selected="false" - label="Use segment definition from history" /> - - <when value="false" /> - <when value="true"> - <param name="input_xml" - type="data" - format="xml" - multiple="false" - argument="-x" - label="Custom 'segments.xml'-syntaxed file" /> - </when> - </conditional> - </inputs> - - <outputs> - <data format="text" name="output_list" label="${tool.name} on ${str($input_fasta.hid) + ': ' + $input_fasta.name}" /> - </outputs> - - <tests> - <test> - <param name="input_fasta" value="SNORD13-revised.fa" ftype="fasta" /> - <param name="use_custom_xml" value="false" ftype="fasta" /> - - <output name="output_list" file="SNORD13-revised.scan-for-segments.txt" /> - </test> - <test> - <param name="input_fasta" value="SNORD48-revised.fa" ftype="fasta" /> - <param name="use_custom_xml" value="false" ftype="fasta" /> - - <output name="output_list" file="SNORD48-revised.scan-for-segments.txt" /> - </test> - <test> - <param name="input_fasta" value="SNORD114-4-revised.fa" ftype="fasta" /> - <param name="use_custom_xml" value="false" ftype="fasta" /> - - <output name="output_list" file="SNORD114-4-revised.scan-for-segments.txt" /> - </test> - <test> - <param name="input_fasta" value="SNORD118-revised.fa" ftype="fasta" /> - <param name="use_custom_xml" value="false" ftype="fasta" /> - - <output name="output_list" file="SNORD118-revised.scan-for-segments.txt" /> - </test> - </tests> - - <help><![CDATA[ -This is an utility of the segmentation-fold package that allows to scan for the presence of certain segments. -If present, it will also scan for the Gibbs free energy necessairy the segment has to provide to contribute to the optimal structure. - ]]></help> - - <citations> - <citation type="bibtex"> - @mastersthesis{mastersthesis, - author = {Youri Hoogstrate}, - title = {An algorithm for predicting RNA 2D structures including K-turns}, - school = {University of Technology Delft, Leiden University}, - year = 2012, - address = {}, - month = 11, - note = {Research assignment for Master Computer-science}, - url = { https://yh-kt-fold.googlecode.com/files/Report.pdf } - } - </citation> - </citations> -</tool>
--- a/segmentation-fold.xml Thu Mar 31 04:26:12 2016 -0400 +++ b/segmentation-fold.xml Thu Jul 28 10:25:37 2016 -0400 @@ -1,13 +1,16 @@ -<tool id="segmentation_fold" name="segmentation-fold" version="1.6.3-1"> +<tool id="segmentation_fold" name="segmentation-fold" version="@VERSION@-1"> <description>RNA-Folding including predefined segments including K-turns</description> + <macros> + <import>macros.xml</import> + </macros> <requirements> - <requirement type="package" version="1.6.3">segmentation-fold</requirement> + <requirement type="package" version="1.6.5">segmentation-fold</requirement> </requirements> - <stdio></stdio> + <expand macro="stdio" /> - <version_command>segmentation-fold --version | head -n 2 | tail -n 1 | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'</version_command> + <version_command>@VERSION_COMMAND_SMF@</version_command> <command><![CDATA[ segmentation-fold @@ -234,18 +237,5 @@ Youri Hoogstrate (yhoogstrate @ github) ]]></help> - <citations> - <citation type="bibtex"> - @mastersthesis{mastersthesis, - author = {Youri Hoogstrate}, - title = {An algorithm for predicting RNA 2D structures including K-turns}, - school = {University of Technology Delft, Leiden University}, - year = 2012, - address = {}, - month = 11, - note = {Research assignment for Master Computer-science}, - url = { https://yh-kt-fold.googlecode.com/files/Report.pdf } - } - </citation> - </citations> + <expand macro="citations" /> </tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_01.in.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,8 @@ +>chr1:10-21 x unknown-01 +GGGGAAACCCC +((((...)))) ((.((.)).)) -2.5 +((.((.)).)) (((((.))))) -3.5 +>chr1:25-36 x unknown-01 +AAAAAAAAAAA +>chr1:45-56 x unknown-01 +AAAAAAAAAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_02.in.bed Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,6 @@ +chr1 0 1 firstbase 0 + +chr1 0 5 1-2-3-4-5 0 + +chr1 5 10 6-7-8-9-10 0 + +chr1 10 11 hideme 0 + +chr2 0 5 hideme2 0 + +chr2 5 10 hideme3 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_02.in.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,6 @@ +>chr1:0-10 x unknown-01 +AAAAAAAAAA +>chr1:25-36 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 1) +AAAAAAAAAAA +>chr1:45-56 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 2) +AAAAAAAAAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_02.out.n.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,4 @@ +>chr1:25-36 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 1) +AAAAAAAAAAA +>chr1:45-56 x unknown-01 (aligned reads tests/test-data/DBNFile.test_01.in.bam: 2) +AAAAAAAAAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_02.out.o.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,2 @@ +>chr1:0-10 x unknown-01 (overlap in tests/test-data/DBNFile.test_02.in.bed: firstbase,1-2-3-4-5,6-7-8-9-10) +AAAAAAAAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_03.in.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,42 @@ +>chr3.rna:5-35(+) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +>chr3.rna:5-35(+) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +(((.(((........))).)))........ .((...(((((...........))))))). -13.125 +>chr3.rna:5-80(+) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna:5-80(+) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +>chr3.rna:50-80(+) x Kt-CD-box.CGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna:50-80(+) x Kt-CD-box.UGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125 +>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.CGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG +...((............(((((.((.((((.....)))).))))))).((((((...........)))))))) ...((..((((((............))...)))).(((......))).((((((...........)))))))) 0.0 +>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.UGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG +>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.CGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT +.........(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))...... ..(((..((((((............))...))))...........(((((((((...........)))))))))...)))....... 0.0 +>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.UGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT +>chr3.rna.RC:35-5(-) x Kt-CD-box.CGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna.RC:35-5(-) x Kt-CD-box.UGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125 +>chr3.rna.RC:80-5(-) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna.RC:80-5(-) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +>chr3.rna.RC:80-50(-) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +>chr3.rna.RC:80-50(-) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +(((.(((........))).)))........ .((...(((((...........))))))). -13.125
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_03.out.l.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,14 @@ +>chr3.rna:5-80(+) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.CGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG +...((............(((((.((.((((.....)))).))))))).((((((...........)))))))) ...((..((((((............))...)))).(((......))).((((((...........)))))))) 0.0 +>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.CGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT +.........(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))...... ..(((..((((((............))...))))...........(((((((((...........)))))))))...)))....... 0.0 +>chr3.rna.RC:80-5(-) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0 +(((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).)))) (((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..)))))))) 0.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DBNFile.test_03.out.s.dbn Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,28 @@ +>chr3.rna:5-35(+) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +>chr3.rna:5-35(+) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +(((.(((........))).)))........ .((...(((((...........))))))). -13.125 +>chr3.rna:5-80(+) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna:50-80(+) x Kt-CD-box.CGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna:50-80(+) x Kt-CD-box.UGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125 +>chr4_SNORD118_revised:1-74(+) x Kt-CD-box.UGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTG +>chr4_SNORD118_revised:1-88(+) x Kt-CD-box.UGU +GAACATGATGATTGGAGATGCATGAAACGTGATTAACGTCTCTGCGTAATCAGGACTTGCAACACCCTGATTGCTCCTGTCTGATTT +>chr3.rna.RC:35-5(-) x Kt-CD-box.CGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna.RC:35-5(-) x Kt-CD-box.UGU +ACTTGTGATGAAACACTCATGGTCTGAAGA +..(..(((((...)).)))..)........ .((...(((((....(....).))))))). -13.125 +>chr3.rna.RC:80-5(-) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGTAGCCCTTTTTGGGCTACTTGTGATGAAACACTCATGGTCTGAAGA +>chr3.rna.RC:80-50(-) x Kt-CD-box.CGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +>chr3.rna.RC:80-50(-) x Kt-CD-box.UGU +TCTTGTGATGAGAAGTACTGGATCTGAAGT +(((.(((........))).)))........ .((...(((((...........))))))). -13.125
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ExtractBoxedSequences.test_01.in.bed Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,6 @@ +chr10 0 7 box1-f:NRTGATG 0 + +chr10 14 18 box2-f:CTGA 0 + +chr10 28 35 box1-f:NRTGATG 0 + +chr10 42 46 box2-f:CTGA 0 + +chr10 56 63 box1-f:NRTGATG 0 + +chr10 70 74 box2-f:CTGA 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ExtractBoxedSequences.test_01.in.fa Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,12 @@ +>chr10 +AATGATG +aaaaaaa +CTGAaaa +ccccccc +AATGATG +aaaaaaa +CTGAaaa +ccccccc +AATGATG +aaaaaaa +CTGAaaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ExtractBoxedSequences.test_01.out.fa Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,12 @@ +>chr10:0-18(+) +AATGATGaaaaaaaCTGA +>chr10:0-46(+) +AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA +>chr10:0-74(+) +AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA +>chr10:28-46(+) +AATGATGaaaaaaaCTGA +>chr10:28-74(+) +AATGATGaaaaaaaCTGAaaacccccccAATGATGaaaaaaaCTGA +>chr10:56-74(+) +AATGATGaaaaaaaCTGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FindBoxes.genome.fa Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,44 @@ +>chr1 +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +>chr2 +aaaaaCTGAaaaaaaaCTGAaaaaa +>chr3 +CTGAaaaaaaaCTGA +>chr4 +CTGACTGA +>chr5 +TCAGaaaaaaTCAG +>chr6 +tcagAAAAAAtcag +>chr7 +AATGATG +CATGATG +TATGATG +GATGATG +AGTGATG +CGTGATG +TGTGATG +GGTGATG +>chr8_no_valid_C_boxes +ACTGATG +CCTGATG +TCTGATG +GCTGATG +ATTGATG +CTTGATG +TTTGATG +GTTGATG +>chr9 +CATCACCCATCACACATCACGCATCACTCATCATCCATCATACATCATGCATCATT +>chr10 +AATGATG +aaaaaaa +CTGAaaa +ccccccc +AATGATG +aaaaaaa +CTGAaaa +ccccccc +AATGATG +aaaaaaa +CTGAaaa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/FindBoxes.test_02.bed Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,36 @@ +chr2 5 9 box2-f:CTGA 0 + +chr2 16 20 box2-f:CTGA 0 + +chr3 0 4 box2-f:CTGA 0 + +chr3 11 15 box2-f:CTGA 0 + +chr4 0 4 box2-f:CTGA 0 + +chr4 4 8 box2-f:CTGA 0 + +chr5 0 4 box2-r:TCAG 0 - +chr5 10 14 box2-r:TCAG 0 - +chr6 0 4 box2-r:TCAG 0 - +chr6 10 14 box2-r:TCAG 0 - +chr7 0 7 box1-f:NRTGATG 0 + +chr7 7 14 box1-f:NRTGATG 0 + +chr7 14 21 box1-f:NRTGATG 0 + +chr7 21 28 box1-f:NRTGATG 0 + +chr7 28 35 box1-f:NRTGATG 0 + +chr7 35 42 box1-f:NRTGATG 0 + +chr7 42 49 box1-f:NRTGATG 0 + +chr7 49 56 box1-f:NRTGATG 0 + +chr8_no_valid_C_boxes 1 5 box2-f:CTGA 0 + +chr8_no_valid_C_boxes 8 12 box2-f:CTGA 0 + +chr8_no_valid_C_boxes 15 19 box2-f:CTGA 0 + +chr8_no_valid_C_boxes 22 26 box2-f:CTGA 0 + +chr9 0 7 box1-r:CATCAYN 0 - +chr9 7 14 box1-r:CATCAYN 0 - +chr9 14 21 box1-r:CATCAYN 0 - +chr9 21 28 box1-r:CATCAYN 0 - +chr9 28 35 box1-r:CATCAYN 0 - +chr9 35 42 box1-r:CATCAYN 0 - +chr9 42 49 box1-r:CATCAYN 0 - +chr9 49 56 box1-r:CATCAYN 0 - +chr10 0 7 box1-f:NRTGATG 0 + +chr10 14 18 box2-f:CTGA 0 + +chr10 28 35 box1-f:NRTGATG 0 + +chr10 42 46 box2-f:CTGA 0 + +chr10 56 63 box1-f:NRTGATG 0 + +chr10 70 74 box2-f:CTGA 0 +
--- a/test-data/SNORD114-4-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400 +++ b/test-data/SNORD114-4-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400 @@ -1,74 +1,57 @@ ->SNORD114-4 x Kt-42.dra -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-42.eco +>SNORD114-4 revised x Kt-7 G2nA SAM riboswitch (H. marismortui) CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Loop-E-Motif.bac +>SNORD114-4 revised x Kt-7 (T. thermophilus) CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.UAU +>SNORD114-4 revised x Kt-7 (E. coli) CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.UGU +>SNORD114-4 revised x Kt-7 (D. radiodurans) CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.CAU +>SNORD114-4 revised x Kt-11 (T. thermophilus) CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-42.tth +>SNORD114-4 revised x Kt-11.eco CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-G2nA-SAM-riboswitch (T. tengcongensi) -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-7 G2nA SAM riboswitch (H. marismortui) +>SNORD114-4 revised x Kt-15.hma CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-15.hma +.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..))))) .((((((....(((.((.....((((......))))((....))...)...))))...(((....))).)))))) -6.42471313477 +>SNORD114-4 revised x Kt-23.tth CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA -.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..))))) .((((((....(((.((.....((((......))))((....))...)...))))...(((....))).)))))) -6.4197063446 ->SNORD114-4 x Kt-U4b.hsa +>SNORD114-4 revised x Kt-23.eco CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.GGU -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-7 (E. coli) +>SNORD114-4 revised x Kt-38.hma CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-7 (D. radiodurans) +>SNORD114-4 revised x Kt-42.hma CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-58.hma +>SNORD114-4 revised x Kt-42.tth CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-46.tth +>SNORD114-4 revised x Kt-42.dra CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-SAM-ribo.tte -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-23.eco +>SNORD114-4 revised x Kt-42.eco CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-38.hma -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-46.dra +>SNORD114-4 revised x Kt-46.hma CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.CGU +>SNORD114-4 revised x Kt-46.tth CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-T-box.bsu +>SNORD114-4 revised x Kt-46.dra CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-23.tth +>SNORD114-4 revised x Kt-46.eco CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-46.eco +>SNORD114-4 revised x Kt-58.hma CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-c-di-GMP-II.cac +>SNORD114-4 revised x Kt-U4a.hsa CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-11 (T. thermophilus) -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.AAU +>SNORD114-4 revised x Kt-U4b.hsa CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-L30e.sce -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-7 (T. thermophilus) +>SNORD114-4 revised x Kt-CD-box.CGU CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-42.hma +>SNORD114-4 revised x Kt-CD-box.UGU CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-U4a.hsa -CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-G2nA-SAMribo.bsu +>SNORD114-4 revised x Kt-L30e.sce CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.GAU +>SNORD114-4 revised x Kt-SAM-ribo.tte CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-11.eco +>SNORD114-4 revised x Kt-T-box.bsu CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-46.hma +>SNORD114-4 revised x Kt-c-di-GMP-II.cac CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA ->SNORD114-4 x Kt-CD-box.AGU +>SNORD114-4 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi) CUGAGCCAGUGAUGAAAACUGGUGGCAUAGAAGUCAAGGAUGCUGAAUAAUGUGUGUCUAGAACUCUGAGGUUCA -.(((((((((.......)))))).((.((((..((..((((((..........)))))).))..))))..))))) .((((((...(((((.......((((......)))).((((((..........)))))).....))))))))))) 1.99971199036
--- a/test-data/SNORD118-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400 +++ b/test-data/SNORD118-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400 @@ -1,75 +1,57 @@ ->SNORD118 x Kt-42.dra -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-42.eco +>SNORD118 revised x Kt-7 G2nA SAM riboswitch (H. marismortui) AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Loop-E-Motif.bac +>SNORD118 revised x Kt-7 (T. thermophilus) AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.UAU +>SNORD118 revised x Kt-7 (E. coli) AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.UGU +>SNORD118 revised x Kt-7 (D. radiodurans) AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.CAU +>SNORD118 revised x Kt-11 (T. thermophilus) AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC -.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) .(((...((((((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).))))))))). 3.380651474 ->SNORD118 x Kt-42.tth +>SNORD118 revised x Kt-11.eco AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-G2nA-SAM-riboswitch (T. tengcongensi) +>SNORD118 revised x Kt-15.hma AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-7 G2nA SAM riboswitch (H. marismortui) -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-15.hma +>SNORD118 revised x Kt-23.tth AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-U4b.hsa +>SNORD118 revised x Kt-23.eco AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.GGU -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-7 (E. coli) +>SNORD118 revised x Kt-38.hma AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-7 (D. radiodurans) +>SNORD118 revised x Kt-42.hma AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-58.hma +>SNORD118 revised x Kt-42.tth AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-46.tth +>SNORD118 revised x Kt-42.dra AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-SAM-ribo.tte -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-23.eco +>SNORD118 revised x Kt-42.eco AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-38.hma -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-46.dra +>SNORD118 revised x Kt-46.hma AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.CGU +>SNORD118 revised x Kt-46.tth AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC -.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) (((((...((((((............))...))))...))......(((((((((...........))))))))).....)))...... -15.021024704 ->SNORD118 x Kt-T-box.bsu +>SNORD118 revised x Kt-46.dra AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-23.tth +>SNORD118 revised x Kt-46.eco AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-46.eco +>SNORD118 revised x Kt-58.hma AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-c-di-GMP-II.cac +>SNORD118 revised x Kt-U4a.hsa AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-11 (T. thermophilus) -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.AAU +>SNORD118 revised x Kt-U4b.hsa AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-L30e.sce -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-7 (T. thermophilus) +>SNORD118 revised x Kt-CD-box.CGU AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-42.hma +.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) (((((...((((((............))...))))...))......(((((((((...........))))))))).....)))...... -15.038848877 +>SNORD118 revised x Kt-CD-box.UGU AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-U4a.hsa -AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-G2nA-SAMribo.bsu +>SNORD118 revised x Kt-L30e.sce AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.GAU +>SNORD118 revised x Kt-SAM-ribo.tte AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC -.(((.((.(.(((.(((((((((.((.((((.....)))).)))))))(((((((...........))))))).)))).)))).))))) (((.(((...(((((......))))))((((.....))))...)).(((((((((...........))))))))).....)))...... -8.30012321472 ->SNORD118 x Kt-11.eco +>SNORD118 revised x Kt-T-box.bsu AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-46.hma +>SNORD118 revised x Kt-c-di-GMP-II.cac AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC ->SNORD118 x Kt-CD-box.AGU +>SNORD118 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi) AGAACAUGAUGAUUGGAGAUGCAUGAAACGUGAUUAACGUCUCUGCGUAAUCAGGACUUGCAACACCCUGAUUGCUCCUGUCUGAUUUC
--- a/test-data/SNORD13-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400 +++ b/test-data/SNORD13-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400 @@ -1,74 +1,57 @@ ->SNORD13 x Kt-42.dra -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-42.eco +>SNORD13 revised x Kt-7 G2nA SAM riboswitch (H. marismortui) GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Loop-E-Motif.bac +>SNORD13 revised x Kt-7 (T. thermophilus) GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.UAU +>SNORD13 revised x Kt-7 (E. coli) GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.UGU +>SNORD13 revised x Kt-7 (D. radiodurans) GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.CAU +>SNORD13 revised x Kt-11 (T. thermophilus) GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-42.tth +>SNORD13 revised x Kt-11.eco GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-G2nA-SAM-riboswitch (T. tengcongensi) -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-7 G2nA SAM riboswitch (H. marismortui) +>SNORD13 revised x Kt-15.hma GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-15.hma -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-U4b.hsa +>SNORD13 revised x Kt-23.tth GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.GGU +>SNORD13 revised x Kt-23.eco GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-7 (E. coli) +>SNORD13 revised x Kt-38.hma GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-7 (D. radiodurans) +>SNORD13 revised x Kt-42.hma GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-58.hma +>SNORD13 revised x Kt-42.tth GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-46.tth +>SNORD13 revised x Kt-42.dra GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-SAM-ribo.tte -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-23.eco +>SNORD13 revised x Kt-42.eco GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-38.hma -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-46.dra +>SNORD13 revised x Kt-46.hma GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.CGU +>SNORD13 revised x Kt-46.tth GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC -(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..)))))) (((((.(.(...((((((.((((((((((((....)))))))(((((((..(.((((((...))).))).)..))))))))))))))))))))))))) 3.35026359558 ->SNORD13 x Kt-T-box.bsu +>SNORD13 revised x Kt-46.dra GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-23.tth +>SNORD13 revised x Kt-46.eco GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-46.eco +>SNORD13 revised x Kt-58.hma GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-c-di-GMP-II.cac +>SNORD13 revised x Kt-U4a.hsa GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-11 (T. thermophilus) -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.AAU +>SNORD13 revised x Kt-U4b.hsa GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-L30e.sce -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-7 (T. thermophilus) +>SNORD13 revised x Kt-CD-box.CGU GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-42.hma +(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..)))))) (((((.(.(...((((((.((((((((((((....)))))))(((((((..(.((((((...))).))).)..))))))))))))))))))))))))) 3.37054443359 +>SNORD13 revised x Kt-CD-box.UGU GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-U4a.hsa -GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-G2nA-SAMribo.bsu +>SNORD13 revised x Kt-L30e.sce GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.GAU +>SNORD13 revised x Kt-SAM-ribo.tte GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC -(((((.(..((.(((....((((((((((((....)))))))(((((((..(.((((((...))).))).)..)))))))))))))))))..)))))) (((((((.((((...((((((((((....))))))..)))))...)))))............((((.((((.((........)))))))))).))))) -9.91994667053 ->SNORD13 x Kt-11.eco +>SNORD13 revised x Kt-T-box.bsu GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-46.hma +>SNORD13 revised x Kt-c-di-GMP-II.cac GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC ->SNORD13 x Kt-CD-box.AGU +>SNORD13 revised x Kt-G2nA-SAM-riboswitch (T. tengcongensi) GUUCAUGAGCGUGAUGAUUGGGUGUUCAUACGCUUGUGUGAGAUGUGCCACCCUUGAACCUUGUUACGACGUGGGCACAUUACCCGUCUGACCUGAAC
--- a/test-data/SNORD48-revised.scan-for-segments.txt Thu Mar 31 04:26:12 2016 -0400 +++ b/test-data/SNORD48-revised.scan-for-segments.txt Thu Jul 28 10:25:37 2016 -0400 @@ -1,74 +1,56 @@ ->SNORD48 x Kt-42.dra -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-42.eco +>SNORD48 revised (2 extra bases on 5' end) x Kt-7 G2nA SAM riboswitch (H. marismortui) GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Loop-E-Motif.bac +>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (T. thermophilus) GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.UAU +>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (E. coli) GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.UGU +>SNORD48 revised (2 extra bases on 5' end) x Kt-7 (D. radiodurans) GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.CAU +>SNORD48 revised (2 extra bases on 5' end) x Kt-11 (T. thermophilus) GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-42.tth +>SNORD48 revised (2 extra bases on 5' end) x Kt-11.eco GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-G2nA-SAM-riboswitch (T. tengcongensi) -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-7 G2nA SAM riboswitch (H. marismortui) +>SNORD48 revised (2 extra bases on 5' end) x Kt-15.hma GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-15.hma -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-U4b.hsa +>SNORD48 revised (2 extra bases on 5' end) x Kt-23.tth GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.GGU +>SNORD48 revised (2 extra bases on 5' end) x Kt-23.eco GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-7 (E. coli) +>SNORD48 revised (2 extra bases on 5' end) x Kt-38.hma GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-7 (D. radiodurans) +>SNORD48 revised (2 extra bases on 5' end) x Kt-42.hma GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-58.hma +>SNORD48 revised (2 extra bases on 5' end) x Kt-42.tth GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-46.tth +>SNORD48 revised (2 extra bases on 5' end) x Kt-42.dra GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-SAM-ribo.tte -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-23.eco +>SNORD48 revised (2 extra bases on 5' end) x Kt-42.eco GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-38.hma -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-46.dra +>SNORD48 revised (2 extra bases on 5' end) x Kt-46.hma GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.CGU +>SNORD48 revised (2 extra bases on 5' end) x Kt-46.tth GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-T-box.bsu +>SNORD48 revised (2 extra bases on 5' end) x Kt-46.dra GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-23.tth +>SNORD48 revised (2 extra bases on 5' end) x Kt-46.eco GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-46.eco +>SNORD48 revised (2 extra bases on 5' end) x Kt-58.hma GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-c-di-GMP-II.cac +>SNORD48 revised (2 extra bases on 5' end) x Kt-U4a.hsa GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-11 (T. thermophilus) -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.AAU +>SNORD48 revised (2 extra bases on 5' end) x Kt-U4b.hsa GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-L30e.sce -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-7 (T. thermophilus) +>SNORD48 revised (2 extra bases on 5' end) x Kt-CD-box.CGU GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-42.hma +>SNORD48 revised (2 extra bases on 5' end) x Kt-CD-box.UGU GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-U4a.hsa -GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-G2nA-SAMribo.bsu +>SNORD48 revised (2 extra bases on 5' end) x Kt-L30e.sce GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.GAU +>SNORD48 revised (2 extra bases on 5' end) x Kt-SAM-ribo.tte GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC -((((((.((.((((((((((.....)))).).).)))).((((...))))...)).)))))).... (((((...(((((((...))).....))))).....(((((............))))))))).... -10.9206504822 ->SNORD48 x Kt-11.eco +>SNORD48 revised (2 extra bases on 5' end) x Kt-T-box.bsu GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-46.hma +>SNORD48 revised (2 extra bases on 5' end) x Kt-c-di-GMP-II.cac GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC ->SNORD48 x Kt-CD-box.AGU +>SNORD48 revised (2 extra bases on 5' end) x Kt-G2nA-SAM-riboswitch (T. tengcongensi) GGAGUGAUGAUGACCCCAGGUAACUCUUGAGUGUGUCGCUGAUGCCAUCACCGCAGCGCUCUGACC -((((((.((.((((((((((.....)))).).).)))).((((...))))...)).)))))).... ((...(((((..((..((((.....)))).))((((.((((((...))))..)).))))))))))) -1.84962844849
--- a/test-data/segments_truncated.2.out.txt Thu Mar 31 04:26:12 2016 -0400 +++ b/test-data/segments_truncated.2.out.txt Thu Jul 28 10:25:37 2016 -0400 @@ -1,5 +1,7 @@ ->C/D-box snoRNA (shuffle iteration 1) -agaggCGUGAUcccaacgUGAuggc -....((((.......))))...... ....(...((((.....)))))... -9.3900680542 ->Artificial double C/D K-turn construct (shuffle iteration 1) -uguucugucacggcacauaccuccggUGUGAUggUGAauaguaUGAgaaguaucgugugucagaggcccuaaUGUGAUgccuuaa +>C/D-box snoRNA x Kt-CD-box.CGU +GCUCUGACCGAAAGGCGUGAUGAGC +((((....((.....))....)))) (((((((((....))...))))))) 2.49938964844 +>Artificial double C/D K-turn construct x Kt-CD-box.UGU +GGGAGUCUUGUGAUGAGAAGUACUGGAUCUGAAGUAGCCCUUUUUGGGCUACUUGUGAUGAAACACUCAUGGUCUGAAGACUCCC +((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656 +((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656
--- a/test-data/segments_truncated.out.txt Thu Mar 31 04:26:12 2016 -0400 +++ b/test-data/segments_truncated.out.txt Thu Jul 28 10:25:37 2016 -0400 @@ -1,6 +1,7 @@ ->C/D-box snoRNA +>C/D-box snoRNA x Kt-CD-box.CGU GCUCUGACCGAAAGGCGUGAUGAGC -((((....((.....))....)))) (((((((((....))...))))))) 2.50072479248 ->Artificial double C/D K-turn construct +((((....((.....))....)))) (((((((((....))...))))))) 2.49938964844 +>Artificial double C/D K-turn construct x Kt-CD-box.UGU GGGAGUCUUGUGAUGAGAAGUACUGGAUCUGAAGUAGCCCUUUUUGGGCUACUUGUGAUGAAACACUCAUGGUCUGAAGACUCCC -((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.25102996826 +((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656 +((((((((((((........))).((((((((((((((((.....))))))))(((......))).))).))))).))))))))) ((((((((...(((((..(....((..(((((((((((((.....))))))))...)))))..))....)..))))))))))))) -1.26037597656
--- a/tool_dependencies.xml Thu Mar 31 04:26:12 2016 -0400 +++ b/tool_dependencies.xml Thu Jul 28 10:25:37 2016 -0400 @@ -1,10 +1,24 @@ <?xml version="1.0"?> <tool_dependency> - <package name="segmentation-fold" version="1.6.3"> - <repository changeset_revision="f4981e860e2c" name="package_segmentation_fold_1_6_3" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="segmentation-fold" version="1.6.5"> + <repository changeset_revision="f448376f428f" name="package_segmentation_fold_1_6_5__utils_2_0_1" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> + <package name="python" version="2.7.10"> <repository changeset_revision="0339c4a9b87b" name="package_python_2_7_10" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> + <package name="numpy" version="1.9"> + <repository changeset_revision="f24fc0b630fc" name="package_python_2_7_numpy_1_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="pysam" version="0.8.2.1"> + <repository changeset_revision="f8fecf1f6eba" name="package_pysam_0_8_2" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="htseq" version="0.6.1"> + <repository changeset_revision="fbb72996807d" name="package_htseq_0_6" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + + <package name="segmentation-fold-utils" version="2.0.1"> + <repository changeset_revision="f448376f428f" name="package_segmentation_fold_1_6_5__utils_2_0_1" owner="yhoogstrate" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> </tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils_add-read-counts.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,81 @@ +<tool id="smf_utils_add-read-counts" name="add-read-counts" version="@VERSION@-3"> + <description>Annotate sequences by adding the read counts from a bam file, within a region contained in the fasta header of the dbn file</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + </requirements> + <expand macro="stdio" /> + + <version_command>@VERSION_COMMAND_UTILS@</version_command> + + <command><![CDATA[ + ln -f -s '${bam_input_file.metadata.bam_index}' '${bam_input_file}.bai' && + + segmentation-fold-utils + add-read-counts + --regex '${regex.replace("'","\\'")}' + '$dbn_input_file' + '$bam_input_file' + '$dbn_output_file' + ]]></command> + + <inputs> + <param name="dbn_input_file" + type="data" + format="dbn,txt,text" + label="Input DBN file" + help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/> + <param name="bam_input_file" + type="data" + format="bam" + label="Input BAM file"/> + <param name="regex" + type="text" + argument="--regex" + value='>.*?(chr[^:]):([0-9]+)-([0-9]+)' + label="Regex to capture the targeted location in DBN file" + help="Do not change this value unless you're using customized software in the pipeline - default: '>.*?(chr[^:]):([0-9]+)-([0-9]+)'" /> + </inputs> + + <outputs> + <data name="dbn_output_file" + format="dbn" + label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name}"/> + </outputs> + + <tests> + <test> + <param name="dbn_input_file" value="DBNFile.test_01.in.dbn" ftype="dbn"/> + <param name="bam_input_file" value="DBNFile.test_01.in.bam" ftype="bam"/> + <param name="regex" value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'/> + + <output name="dbn_output_file"> + <assert_contents> + <has_line_matching expression=">chr1:10-21 x unknown-01 \(aligned reads .*?: 20\)"/> + <has_line line="GGGGAAACCCC"/> + <has_line line="((((...))))	((.((.)).))	-2.5"/> + <has_line line="((.((.)).))	(((((.)))))	-3.5"/> + + <has_line_matching expression=">chr1:25-36 x unknown-01 \(aligned reads.*?: 1\)"/> + <has_line line="AAAAAAAAAAA"/> + + <has_line_matching expression=">chr1:45-56 x unknown-01 \(aligned reads .*?: 2\)"/> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ +This is an utility of the segmentation-fold package + ]]></help> + + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils_estimate-energy.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,144 @@ +<tool id="smf_utils_estimate-energy" name="estimate-energy" version="@VERSION@-2"> + <description>Estimates whether a certain Segment(Loop) is present and for which delta-G this transistion takes place</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + + <requirement type="package" version="1.6.5">segmentation-fold</requirement> + </requirements> + <expand macro="stdio" /> + + <version_command>echo $(@VERSION_COMMAND_SMF@)", "$(@VERSION_COMMAND_UTILS@)</version_command> + + <command><![CDATA[ + segmentation-fold-utils + estimate-energy + -T \${GALAXY_SLOTS:-2} + -x + #if str($parameters.use_custom_xml) == "true" + "${parameters.input_xml}" + #else + "\$SEGMENTATION_FOLD_DEFAULT_XML" + #end if + + -p $precision + -r $randomize + + #if $sequences_from_fasta_file: + --sequences-from-fasta-file "${sequences_from_fasta_file}" + #end if + + $output_list + ]]></command> + + <inputs> + <conditional name="parameters"> + <param name="use_custom_xml" + type="boolean" + truevalue="true" + falsevalue="false" + selected="false" + label="Use segment definition from history" /> + + <when value="false" /> + <when value="true"> + <param name="input_xml" + type="data" + format="xml" + multiple="false" + argument="-x" + label="Custom 'segments.xml'-syntaxed file" /> + </when> + </conditional> + + <param name="precision" + type="float" + value="0.05" + min="0" + argument="--precision" + label="Precision" + help="Minimal difference for binary split - the smaller this value the slower. if this value equals 0, the difference is set to infinity (default: 0.05)" /> + + <param name="randomize" + type="integer" + value="0" + min="0" + argument="--randomize" + label="Shuffle each sequence this many times and predict energy of shuffled sequence(s) (default: 0, 0 means disabled)" /> + + <param name="sequences_from_fasta_file" + type="data" + format="fasta" + multiple="false" + optional="true" + argument="--sequences-from-fasta-file" + label="Optional sequences to scan for Segment(Loop)s (FASTA)" + help="Use sequences from a FASTA file instead of the XML file that contains the segments. In XML files you can explicitly link one Segment(Loop) to one particular sequence instead of doing n*n comparisons (default: None)" /> + </inputs> + + <outputs> + <data format="dbn" name="output_list" label="${tool.name}" /> + </outputs> + + <tests> + <!-- xml * fasta mode --> + <test> + <param name="use_custom_xml" value="false" ftype="fasta" /> + <param name="sequences_from_fasta_file" value="SNORD13-revised.fa" ftype="fasta" /> + + <output name="output_list" file="SNORD13-revised.scan-for-segments.txt" /> + </test> + <test> + <param name="use_custom_xml" value="false" ftype="fasta" /> + <param name="sequences_from_fasta_file" value="SNORD48-revised.fa" ftype="fasta" /> + + <output name="output_list" file="SNORD48-revised.scan-for-segments.txt" /> + </test> + <test> + <param name="use_custom_xml" value="false" ftype="fasta" /> + <param name="sequences_from_fasta_file" value="SNORD114-4-revised.fa" ftype="fasta" /> + + <output name="output_list" file="SNORD114-4-revised.scan-for-segments.txt" /> + </test> + <test> + <param name="use_custom_xml" value="false" ftype="fasta" /> + <param name="sequences_from_fasta_file" value="SNORD118-revised.fa" ftype="fasta" /> + + <output name="output_list" file="SNORD118-revised.scan-for-segments.txt" /> + </test> + + + <!-- xml * xml mode --> + <test> + <param name="use_custom_xml" value="true" /> + <param name="input_xml" value="segments_truncated.xml" ftype="xml" /> + <param name="do_randomization" value="false" /> + <param name="shuffle_n_times" value="0" /> + + <output name="output_list" file="segments_truncated.out.txt" lines_diff="2" /> + </test> + <test> + <param name="use_custom_xml" value="true" /> + <param name="input_xml" value="segments_truncated.xml" ftype="xml" /> + <param name="do_randomization" value="false" /> + <param name="shuffle_n_times" value="1" /> + + <output name="output_list" file="segments_truncated.2.out.txt" lines_diff="10" /> + </test> + </tests> + + <help><![CDATA[ +This is an utility of the segmentation-fold package that allows to scan for the presence of certain segments. +If present, it will also scan for the Gibbs free energy necessairy the segment has to provide to contribute to the optimal structure. + ]]></help> + + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils_extract-boxed-sequences.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,114 @@ +<tool id="smf_utils_extract-boxed-sequences" name="extract-boxed-sequences" version="@VERSION@-1"> + <description>Extracts boxed sequences from bed_input_file which has to be created with 'find-box', part of this utility</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + </requirements> + <expand macro="stdio" /> + + <version_command>@VERSION_COMMAND_UTILS@</version_command> + + <command><![CDATA[ + segmentation-fold-utils + extract-boxed-sequences + + --max-inner-dist $max_inner_dist + --bp-extension $bp_extension + + '${fasta_input_file}' + '${bed_input_file}' + '${fasta_output_file}' + + ]]></command> + + <inputs> + <param name="fasta_input_file" + type="data" + format="fasta" + label="Genomic reference FASTA file"/> + <param name="bed_input_file" + type="data" + format="bed" + label="BED file containing the sequence boxes" + help="This file should have been created with 'find-boxes'"/> + + <param name="max_inner_dist" + type="integer" + min="0" + value="250" + label="Maximal distance between the boxes" + help="(default=250bp)"/> + <param name="bp_extension" + type="integer" + min="0" + value="10" + label="Extend extracted sequences with this number of bases" + help="(default: 10bp)"/> + </inputs> + + <outputs> + <data name="fasta_output_file" + format="fasta" + label="${tool.name} on ${fasta_input_file.hid}: ${fasta_input_file.name}"/> + </outputs> + + <tests> + <test> + <param name="fasta_input_file" value="ExtractBoxedSequences.test_01.in.fa" ftype="fasta"/> + <param name="bed_input_file" value="ExtractBoxedSequences.test_01.in.bed" ftype="bed"/> + <param name="max_inner_dist" value='100'/> + <param name="bp_extension" value='0'/> + + <output name="fasta_output_file" file="ExtractBoxedSequences.test_01.out.fa"/> + </test> + </tests> + + <help><![CDATA[ +extact-boxed-sequences +---------------------- +*Extracts boxes sequences from `bed_input_file` which has to be created with 'find-box', also part of this utility* + +The user can use this utility to extract sequences containing the boxes provided in the bed file by `find-boxes`. + +**input** + +Important information about the input: + + - `FASTA_INPUT_FILE` can be any generic FASTA file that can be read with pysam. This means that if the sequence is split into multiple lines, they must all be at the same length. + - `BED_INPUT_FILE` the bed file should be provided by `find-boxes` as it properly denotes the names (box1-f, box1-r, box2-f and box2-r) which are used for extraction. + - `-d, --max-inner-dist INTEGER` Only sequences for which the distance in bases between the boxes is smaller than this distance, will be extracted. Boxes are excluded from this distance. + - `-e, --bp-extension INTEGER` Each sequence will be exteded with: + * The boxes + * An optional number of bases provided with this argument + +**output** + +Be aware that there can be overlapping sequences. For example, if you started box1=`TTTT` and box2=`CCCC` with the following sequence, you will extract 2 sequences: + +```>seq +gagagaTTTTgagagaTTTTgagagagagagagagaCCCCgaga +``` + +Namely: + +```TTTTgagagaTTTTgagagagagagagagaCCCC +``` + +and + +``` TTTTgagagagagagagagaCCCC +``` + +This is an utility of the segmentation-fold package + ]]></help> + + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils_filter-annotated-entries.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,84 @@ +<tool id="smf_utils_filter-annotated-entries" name="filter-annotated-entries" version="@VERSION@-1"> + <description>Split entries into two files based on whether they overlap annotations in a bed file</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + </requirements> + <expand macro="stdio" /> + + <version_command>@VERSION_COMMAND_UTILS@</version_command> + + <command><![CDATA[ + segmentation-fold-utils + filter-annotated-entries + --regex '${regex.replace("'","\\'")}' + '$dbn_input_file' + '$bed_input_file' + '$dbn_output_file_overlapping' + '$dbn_output_file_non_overlapping' + ]]></command> + + <inputs> + <param name="dbn_input_file" + type="data" + format="dbn,txt" + label="Input DBN file" + help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/> + <param name="bed_input_file" + type="data" + format="bed" + label="The resultes will be filtered based on overlap with annotations in this BED file"/> + <param name="regex" + type="text" + argument="--regex" + value='>.*?(chr[^:]):([0-9]+)-([0-9]+)' + label="Regex to capture the targeted location in DBN file" + help="Do not change this value unless you're using customized software in the pipeline - default: '>.*?(chr[^:]):([0-9]+)-([0-9]+)'" /> + </inputs> + + <outputs> + <data name="dbn_output_file_overlapping" + format="dbn" + label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - overlapping entries"/> + <data name="dbn_output_file_non_overlapping" + format="dbn" + label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - non overlapping entries"/> + </outputs> + + <tests> + <test> + <param name="dbn_input_file" value="DBNFile.test_02.in.dbn" ftype="dbn"/> + <param name="bed_input_file" value="DBNFile.test_02.in.bed" ftype="bed"/> + <param name="regex" value='>.*?(chr[^:]):([0-9]+)-([0-9]+)'/> + + <output name="dbn_output_file_overlapping"> + <assert_contents> + <has_line_matching expression=">chr1:0-10 x unknown-01 \(overlap in .*?: firstbase,1-2-3-4-5,6-7-8-9-10\)"/> + <has_line line="AAAAAAAAAA"/> + </assert_contents> + </output> + <output name="dbn_output_file_non_overlapping"> + <assert_contents> + <has_line_matching expression=">chr1:25-36 x unknown-01 \(aligned reads .*?: 1\)"/> + <has_line line="AAAAAAAAAAA"/> + + <has_line_matching expression=">chr1:45-56 x unknown-01 \(aligned reads .*?: 2\)"/> + </assert_contents> + </output> + </test> + </tests> + + <help><![CDATA[ +Filter based on whether the entries in the DBN file are already annotated or not + ]]></help> + + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils_filter-by-energy.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,66 @@ +<tool id="smf_utils_filter-by-energy" name="filter-by-energy" version="@VERSION@-1"> + <description>Split entries over two files based on the estimated energy</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + </requirements> + <expand macro="stdio" /> + + <version_command>@VERSION_COMMAND_UTILS@</version_command> + + <command><![CDATA[ + segmentation-fold-utils + filter-by-energy + --energy ${energy} + '$dbn_input_file' + '$dbn_output_file_larger_or_equal' + '$dbn_output_file_smaller' + ]]></command> + + <inputs> + <param name="dbn_input_file" + type="data" + format="dbn,txt" + label="Input DBN file" + help="The 'fasta'-headers should contain the genomic position being used to find overlapping reads in the BAM file"/> + <param name="energy" + type="float" + value="0.0" + argument="--energy" + label="Energy value" + help="Entries with transitions with energy smaller than energy (< e) or without transitions will be put into DBN_OUTPUT_FILE_LARGER_OR_EQUAL and those larger or equal (>= e) to DBN_OUTPUT_FILE_SMALLER" /> + </inputs> + + <outputs> + <data name="dbn_output_file_larger_or_equal" + format="dbn" + label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - energy larger/equal than selected"/> + <data name="dbn_output_file_smaller" + format="dbn" + label="${tool.name} on ${dbn_input_file.hid}: ${dbn_input_file.name} - energy smaller than selected"/> + </outputs> + + <tests> + <test> + <param name="dbn_input_file" value="DBNFile.test_03.in.dbn" ftype="dbn"/> + <param name="energy" value='0.0'/> + + <output name="dbn_output_file_larger_or_equal" file="DBNFile.test_03.out.l.dbn" /> + <output name="dbn_output_file_smaller" file="DBNFile.test_03.out.s.dbn" /> + </test> + </tests> + + <help><![CDATA[ +Filter based on whether the entries in the DBN file are already annotated or not + ]]></help> + + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils_find-boxes.xml Thu Jul 28 10:25:37 2016 -0400 @@ -0,0 +1,90 @@ +<tool id="smf_utils_find-boxes" name="find-boxes" version="@VERSION@-1"> + <description>Finds all occurances of two given boxes (sequence motifs) within a FASTA file</description> + + <macros> + <import>macros.xml</import> + </macros> + + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="1.9">numpy</requirement> + <requirement type="package" version="0.8.2.1">pysam</requirement> + <requirement type="package" version="0.6.1">htseq</requirement> + <requirement type="package" version="2.0.1">segmentation-fold-utils</requirement> + </requirements> + + <expand macro="stdio" /> + + <version_command>@VERSION_COMMAND_UTILS@</version_command> + + <command><![CDATA[ + segmentation-fold-utils + find-boxes + --box1 '${box1}' + --box2 '${box2}' + $forward + $reverse + '${fasta_input}' + '${bed_output}' + ]]></command> + + <inputs> + <param name="fasta_input" + type="data" + format="fasta" + argument="-f" + label="Fasta file with RNA-sequece" /> + + <param name="box1" + type="text" + value="NRUGAUG" + argument="--box1" + label="Sequence of box1 (default = C-box: 'NRUGAUG')" + help="Sequence encoding can be found at the following url: https://en.wikipedia.org/wiki/FASTA_format#Sequence_representations" /> + + <param name="box2" + type="text" + value="CUGA" + argument="--box2" + label="Sequence of box2 (default = D-box: 'CUGA')" + help="Sequence encoding can be found at the following url: https://en.wikipedia.org/wiki/FASTA_format#Sequence_representations" /> + + <param name="forward" + type="boolean" + truevalue="--forward" + falsevalue="--no-forward" + checked="true" + label="Search in the forward direction of the reference sequence" /> + + <param name="reverse" + type="boolean" + truevalue="--reverse" + falsevalue="--no-reverse" + checked="true" + label="Search in the reverse complement of the reference sequence" /> + </inputs> + + <outputs> + <data format="bed" + name="bed_output" + label="${tool.name} on ${str($fasta_input.hid) + ': ' + $fasta_input.name}" /> + </outputs> + + <tests> + <test> + <param name="fasta_input" value="FindBoxes.genome.fa" format="fasta" /> + <param name="box1" value="NRUGAUG" /> + <param name="box2" value="CUGA" /> + <param name="forward" value="--forward" /> + <param name="reverse" value="--reverse" /> + + <output name="bed_output" file="FindBoxes.test_02.bed" /> + </test> + </tests> + + <help><![CDATA[ +Scans a FASTA reference for BOX motifs (like C- and D-box) and reports them in a BED file + ]]></help> + + <expand macro="citations" /> +</tool>