changeset 5:58a870ef434c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/salsa2 commit 270d2e701478bcf3f7cd1db8e188eb2cd69f5db5
author iuc
date Thu, 01 Aug 2024 11:41:30 +0000 (4 months ago)
parents 9a22227bb6d0
children
files salsa2.xml test-data/arima1.agp test-data/arima1.fasta test-data/omnic.agp test-data/omnic.fasta test-data/out.agp test-data/test.fai
diffstat 7 files changed, 245 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/salsa2.xml	Thu May 19 14:17:24 2022 +0000
+++ b/salsa2.xml	Thu Aug 01 11:41:30 2024 +0000
@@ -2,7 +2,7 @@
     <description>scaffold long read assemblies with Hi-C</description>
     <macros>
         <token name="@TOOL_VERSION@">2.3</token>
-        <token name="@VERSION_SUFFIX@">3</token>
+        <token name="@VERSION_SUFFIX@">5</token>
     </macros>
     <xrefs>
         <xref type="bio.tools">SALSA</xref>
@@ -23,8 +23,10 @@
         -e 'GATC'
     #else if $enzyme_conditional.preconfigured_enzymes == 'arima1'
         -e 'GATC,GANTC'
-    #else
+    #else if $enzyme_conditional.preconfigured_enzymes == 'arima2'
         -e 'GATC,GANTC,CTNAG,TTAA'
+    #else if $enzyme_conditional.preconfigured_enzymes == 'omnic'
+        -e 'DNASE'
     #end if
 #else:
     -e '${enzyme_conditional.manual_enzyme}'
@@ -44,20 +46,26 @@
 #end if
 -m '$clean'
 -o ./out
+
+&& 
+
+## The tool seems to generate malformed AGP. Print exactly
+## 9 tab-delimited columns, adding blank columns or
+## removing columns if necessary.
+awk -F'\t'
+'{while(NF<9)\$0=\$0 FS""; if(NF>9)\$9=\$9 FS \$(NF--); print \$1,\$2,\$3,\$4,\$5,\$6,\$7,\$8,\$9}'
+OFS='\t' out/scaffolds_FINAL.agp 
+| cut -f1-9
+> out/scaffolds_FINAL.fixed.agp
+
     ]]></command>
     <inputs>
         <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/>
-        <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly.
-            BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from
-            the Bedtools package."/>
+        <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from the Bedtools package."/>
         <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/>
-        <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs"
-            help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/>
+        <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/>
         <conditional name="enzyme_conditional">
-            <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes.
-                The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual
-                sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you
-                use an enzyme-free prep, e.g. Omin-C.">
+            <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes. The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you use an enzyme-free prep, e.g. Omni-C.">
                 <option value="preconfigured">Preconfigured restriction enzymes</option>
                 <option value="specific">Enter a specific sequence</option>
             </param>
@@ -66,45 +74,26 @@
                     <option value="dovetail">Dovetail Chicago, Dovetail Hi-C or Phase: GATC</option>
                     <option value="arima1">Arima Hi-C 1.0: GATC, GANTC</option>
                     <option value="arima2">Arima Hi-C 2.0: GATC, GANTC, CTNAG, TTAA</option>
+                    <option value="omnic">Dovetail Omni-C: enzyme-free prep</option>
                 </param>
             </when>
             <when value="specific">
-                <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)"
-                    help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT').">
+                <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)" help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT').">
                     <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator>
                 </param>
             </when>
         </conditional>
-        <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true"
-            help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will
-                potentially increase the number of joins, however it could also introduce additional misjoins"/>
-        <param name="clean" argument="-m" type="boolean" label="Clean Assembly" checked="false" truevalue='yes' falsevalue="no"  help="Set this option to 'yes' if you want to find misassemblies in input assembly" />
-        <param name="gensize" argument="-s" type="integer" label="Expected Genome Size" optional="true" help="Expected Genome size of the assembled genome. If not set, Salsa will estimate genome size." />
+        <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will potentially increase the number of joins, however it could also introduce additional misjoins"/>
+        <param name="clean" argument="-m" type="boolean" label="Clean Assembly" checked="false" truevalue="yes" falsevalue="no" help="Set this option to 'yes' if you want to find misassemblies in input assembly"/>
+        <param name="gensize" argument="-s" type="integer" label="Expected Genome Size" optional="true" help="Expected Genome size of the assembled genome. If not set, Salsa will estimate genome size."/>
     </inputs>
     <outputs>
         <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/>
-        <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/>
+        <data name="scaffolds_agp" format="agp" from_work_dir="out/scaffolds_FINAL.fixed.agp" label="${tool.name} on ${on_string}: agp output"/>
     </outputs>
     <tests>
         <test>
             <param name="fasta_in" value="test.fasta"/>
-            <param name="length" value="test.fai"/>
-            <param name="bed_file" value="test.bed"/>
-            <param name="gfa_file" value="test.gfa1"/>
-            <conditional name="enzyme_conditional">
-                <param name="enzyme_options" value="specific"/>
-                <param name="manual_enzyme" value="GATC,GANTC"/>
-            </conditional>
-            <param name="enzyme" value="GATC,GANTC"/>
-            <param name="cutoff" value="1000"/>
-            <param name="iter" value="3"/>
-            <param name="clean" value="yes"/>
-            <output name="scaffolds_fasta" file="out.fasta"/>
-            <output name="scaffolds_agp" file="out.agp"/>
-        </test>
-        <!--Test manual enzyme-->
-        <test>
-            <param name="fasta_in" value="test.fasta"/>
             <param name="bed_file" value="test.bed"/>
             <param name="gfa_file" value="test.gfa1"/>
             <conditional name="enzyme_conditional">
@@ -129,8 +118,23 @@
             <param name="cutoff" value="1000"/>
             <param name="iter" value="3"/>
             <param name="clean" value="yes"/>
-            <output name="scaffolds_fasta" file="out.fasta"/>
-            <output name="scaffolds_agp" file="out.agp"/>
+            <output name="scaffolds_fasta" file="arima1.fasta"/>
+            <output name="scaffolds_agp" file="arima1.agp"/>
+        </test>
+        <!--Test omnic-->
+        <test>
+            <param name="fasta_in" value="test.fasta"/>
+            <param name="bed_file" value="test.bed"/>
+            <param name="gfa_file" value="test.gfa1"/>
+            <conditional name="enzyme_conditional">
+                <param name="enzyme_options" value="preconfigured"/>
+                <param name="preconfigured_enzymes" value="arima1"/>
+            </conditional>
+            <param name="cutoff" value="1000"/>
+            <param name="iter" value="3"/>
+            <param name="clean" value="yes"/>
+            <output name="scaffolds_fasta" file="omnic.fasta"/>
+            <output name="scaffolds_agp" file="omnic.agp"/>
         </test>
     </tests>
     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/arima1.agp	Thu Aug 01 11:41:30 2024 +0000
@@ -0,0 +1,1 @@
+scaffold_1	1	7920	1	W	HiC_scaffold_1	1	7920	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/arima1.fasta	Thu Aug 01 11:41:30 2024 +0000
@@ -0,0 +1,100 @@
+>scaffold_1
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAAATCTGGTGAAGCTTTCTGCATGTGATGT
+GCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACA
+CTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGA
+CGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGC
+CATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCAC
+TGTGTCTCCGTGCCGAGCTAGATCTCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGC
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCTGGCATTGTAGGTACCAGACACTAACAAACTCGTCGT
+CGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGT
+GTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCA
+CATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTAC
+CTTACGGCTAGGGGAGATCTAGCTCGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCT
+TAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTC
+ACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTGCCTTACGGCTAGGGGAGATCT
+AGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACA
+CACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACC
+ACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAG
+TGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGG
+CCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGT
+CGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGT
+GTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCA
+CATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTAC
+CTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCT
+TAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTC
+ACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCT
+AGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACA
+CACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACC
+ACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAG
+TGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGG
+CCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGT
+CGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGT
+GTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCA
+CATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTAC
+CTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCT
+TAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTC
+ACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCT
+AGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACA
+CACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACC
+ACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAG
+TGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGG
+CCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/omnic.agp	Thu Aug 01 11:41:30 2024 +0000
@@ -0,0 +1,1 @@
+scaffold_1	1	7920	1	W	HiC_scaffold_1	1	7920	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/omnic.fasta	Thu Aug 01 11:41:30 2024 +0000
@@ -0,0 +1,100 @@
+>scaffold_1
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAAATCTGGTGAAGCTTTCTGCATGTGATGT
+GCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACA
+CTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGA
+CGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGC
+CATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCAC
+TGTGTCTCCGTGCCGAGCTAGATCTCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGT
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGAGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAG
+CAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGC
+ACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTG
+CCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACAC
+TTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGAC
+GACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCC
+ATCGCATTGTGTCTGGTGTGTAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACT
+GTGTCTCCGTGCCGTGCTAGATCTCCCCTAGCCGTAAGGTACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGG
+TATGTGTCATAATCTGGTGAAGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCGCATTGTGTCTGGTGTG
+TAGGTACCCAGACACTTAAGCAAACTCGATCGTTGTACACTTTGATGGGCAACCATCACTGTGTCTCCGTGCCGTGCTAG
+ATCTCCCCTAGCCGTAAGGCACTTTGAACGCCCCTTCGACGACGAGTTACAAGAGTGTGGTATGTGTCATAATCTGGTGA
+AGCTTTCTGCATGTGATGTGCCTTGTGTGGATCCGTGGCCATCTGGCATTGTAGGTACCAGACACTAACAAACTCGTCGT
+CGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGT
+GTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCA
+CATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTAC
+CTTACGGCTAGGGGAGATCTAGCTCGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCT
+TAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTC
+ACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTGCCTTACGGCTAGGGGAGATCT
+AGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACA
+CACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACC
+ACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAG
+TGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGG
+CCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGT
+CGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGT
+GTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCA
+CATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTAC
+CTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCT
+TAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTC
+ACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCT
+AGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACA
+CACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACC
+ACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAG
+TGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGG
+CCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGT
+CGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGT
+GTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCA
+CATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTAC
+CTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCT
+TAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTC
+ACCAGATTATGACACATACCACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCT
+AGCACGGCACGGAGACACAGTGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACA
+CACCAGACACAATGCGATGGCCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACC
+ACACTCTTGTAACTCGTCGTCGAAGGGGCGTTCAAAGTACCTTACGGCTAGGGGAGATCTAGCACGGCACGGAGACACAG
+TGATGGTTGCCCATCAAAGTGTACAACGATCGAGTTTGCTTAAGTGTCTGGGTACCTACACACCAGACACAATGCGATGG
+CCACGGATCCACACAAGGCACATCACATGCAGAAAGCTTCACCAGATTATGACACATACCACACTCTTGTAACTCGTCGT
--- a/test-data/out.agp	Thu May 19 14:17:24 2022 +0000
+++ b/test-data/out.agp	Thu Aug 01 11:41:30 2024 +0000
@@ -1,1 +1,1 @@
-scaffold_1	1	7920	1	W	HiC_scaffold_1	1	7920	+	
+scaffold_1	1	7920	1	W	HiC_scaffold_1	1	7920	+
--- a/test-data/test.fai	Thu May 19 14:17:24 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-HiC_scaffold_1	399920	16	80	81