Repository 'emboss_needle'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/emboss_needle

Changeset 0:2f0dc62d0a19 (2025-01-20)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/emboss commit 08f38b1f9b4241ba9037c64f732621efc628fd43
added:
emboss_needle.xml
macros.xml
test-data/1.fasta
test-data/2.fasta
test-data/emboss_needle_out.fasta
test-data/emboss_needle_out.markx10
test-data/emboss_needle_out.score
test-data/emboss_needleall_input1.fa
test-data/emboss_needleall_input2.fq
test-data/emboss_needleall_out.fasta
test-data/emboss_needleall_out.pair
test-data/emboss_needleall_out.score
b
diff -r 000000000000 -r 2f0dc62d0a19 emboss_needle.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/emboss_needle.xml Mon Jan 20 16:22:05 2025 +0000
[
@@ -0,0 +1,143 @@
+<tool id="emboss_needle" name="EMBOSS: needle" version="@VERSION@+galaxy0" profile="@PROFILE@">
+  <description>Needleman-Wunsch global alignment</description>
+  <macros>
+    <import>macros.xml</import>
+  </macros>
+  <expand macro="bio_tools" />
+  <expand macro="requirements" />
+  <version_command>needle -version</version_command>
+  <command detect_errors="exit_code"><![CDATA[
+needle -asequence '$asequence'
+-bsequence '$bsequence'
+-outfile '$out_file1'
+-gapopen $gapopen
+-gapextend $gapextend
+-brief $brief
+-aformat3 $out_format1
+-auto
+#if $datafile
+-datafile $datafile
+#end if
+#if $endgap.endweight == 'yes'
+-endopen $endgap.endopen
+-endextend $endgap.endextend
+#end if
+]]></command>
+  <inputs>
+    <param argument="-asequence" type="data" format="fasta,fastq" label="Sequence 1" />
+    <param argument="-bsequence" type="data" format="fasta,fastq" label="Sequence 2" />
+
+    <expand macro="scoring_matrix"/>
+    <expand macro="gap_penalties"/>
+    <expand macro="endgap_penalties"/>
+    <expand macro="param_brief"/>
+
+    <expand macro="choose_alignment_output_format"/>
+  </inputs>
+  <outputs>
+    <data name="out_file1" format="needle" label="${tool.name} on ${on_string}: alignment output" >
+      <expand macro="change_alignment_output_format"/>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="asequence" value="2.fasta"/>
+      <param name="bsequence" value="1.fasta"/>
+      <param name="gapopen" value="10"/>
+      <param name="gapextend" value="0.5"/>
+      <param name="brief" value="yes"/>
+      <param name="out_format1" value="score"/>
+      <output name="out_file1" file="emboss_needle_out.score" ftype="score"/>
+    </test>
+    <test><!--test with fastq input -->
+      <param name="asequence" value="emboss_needleall_input2.fq"/>
+      <param name="bsequence" value="1.fasta"/>
+      <param name="gapopen" value="10"/>
+      <param name="gapextend" value="0.5"/>
+      <param name="brief" value="yes"/>
+      <param name="out_format1" value="markx10"/>
+      <output name="out_file1" file="emboss_needle_out.markx10" ftype="markx10" lines_diff="10"/>
+    </test>
+    <test><!-- test with fasta output, custom matrix, and endgap penalties -->
+      <param name="asequence" value="2.fasta"/>
+      <param name="bsequence" value="1.fasta"/>
+      <param name="gapopen" value="10"/>
+      <param name="gapextend" value="0.5"/>
+      <param name="datafile" value="EPAM30"/>
+      <conditional name="endgap">
+        <param name="endweight" value="yes"/>
+        <param name="endopen" value="13.37"/>
+        <param name="endextend" value="2.5"/>
+      </conditional>
+      <param name="brief" value="yes"/>
+      <param name="out_format1" value="fasta"/>
+      <output name="out_file1" file="emboss_needle_out.fasta" ftype="fasta"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+
+needle reads any two sequences of the same type (DNA or protein).
+
+This tool uses the Needleman-Wunsch global alignment algorithm to find the optimum alignment (including gaps) of two sequences when considering their entire length.
+
+- **Optimal alignment:** Dynamic programming methods ensure the optimal global alignment by exploring all possible alignments and choosing the best.
+
+- **The Needleman-Wunsch algorithm** is a member of the class of algorithms that can calculate the best score and alignment in the order of mn steps, (where 'n' and 'm' are the lengths of the two sequences).
+
+- **Gap open penalty:** [10.0 for any sequence] The gap open penalty is the score taken away when a gap is created. The best value depends on the choice of comparison matrix. The default value assumes you are using the EBLOSUM62 matrix for protein sequences, and the EDNAFULL matrix for nucleotide sequences. (Floating point number from 1.0 to 100.0)
+
+- **Gap extension penalty:** [0.5 for any sequence] The gap extension, penalty is added to the standard gap penalty for each base or residue in the gap. This is how long gaps are penalized. Usually you will expect a few long gaps rather than many short gaps, so the gap extension penalty should be lower than the gap penalty. An exception is where one or both sequences are single reads with possible sequencing errors in which case you would expect many single base gaps. You can get this result by setting the gap open penalty to zero (or very low) and using the gap extension penalty to control gap scoring. (Floating point number from 0.0 to 10.0)
+
+You can view the original documentation here_.
+
+    .. _here: http://galaxy-iuc.github.io/emboss-5.0-docs/needle.html
+
+-----
+
+**Example**
+
+- Input File::
+
+    >hg18_dna range=chrX:151073054-151073136 5'pad=0 3'pad=0 revComp=FALSE strand=? repeatMasking=none
+    TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA
+    GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTG
+
+- If both Sequence1 and Sequence2 take the above file as input, Gap open penalty equals 10.0, Gap extension penalty equals 0.5, Brief identity and similarity is set to Yes, Output alignment file format is set to SRS pairs, the output file is::
+
+    ########################################
+    # Program: needle
+    # Rundate: Mon Apr 02 2007 14:23:16
+    # Align_format: srspair
+    # Report_file: ./database/files/dataset_7.dat
+    ########################################
+
+    #=======================================
+    #
+    # Aligned_sequences: 2
+    # 1: hg18_dna
+    # 2: hg18_dna
+    # Matrix: EDNAFULL
+    # Gap_penalty: 10.0
+    # Extend_penalty: 0.5
+    #
+    # Length: 83
+    # Identity:      83/83 (100.0%)
+    # Similarity:    83/83 (100.0%)
+    # Gaps:           0/83 ( 0.0%)
+    # Score: 415.0
+    #
+    #=======================================
+
+    hg18_dna           1 TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA     50
+                       ||||||||||||||||||||||||||||||||||||||||||||||||||
+    hg18_dna           1 TTTATGTCTATAATCCTTACCAAAAGTTACCTTGGAATAAGAAGAAGTCA     50
+
+    hg18_dna          51 GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTG     83
+                       |||||||||||||||||||||||||||||||||
+    hg18_dna          51 GTAAAAAGAAGGCTGTTGTTCCGTGAAATACTG     83
+
+    #---------------------------------------
+    #---------------------------------------
+  ]]></help>
+  <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r 2f0dc62d0a19 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,159 @@\n+<macros>\n+    <token name="@PROFILE@">23.1</token>\n+    <token name="@VERSION@">6.6.0</token>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@VERSION@">emboss</requirement>\n+            <requirement type="package" version="5.26">perl</requirement>\n+        </requirements>\n+    </xml>\n+    <xml name="bio_tools">\n+        <xrefs>\n+            <xref type="bio.tools">emboss</xref>\n+        </xrefs>\n+    </xml>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1016/S0168-9525(00)02024-2</citation>\n+            <citation type="doi">10.1101/gr.5578007</citation>\n+            <yield />\n+        </citations>\n+    </xml>\n+\n+    <xml name="scoring_matrix">\n+        <param argument="-datafile" type="select" optional="true" label="Scoring matrix" help="This is the scoring matrix used when comparing sequences. By default it is EBLOSUM62 (for proteins) or EDNAFULL (for nucleic sequences)">\n+            <option value="EBLOSUM30">EBLOSUM30</option>\n+            <option value="EBLOSUM35">EBLOSUM35</option>\n+            <option value="EBLOSUM40">EBLOSUM40</option>\n+            <option value="EBLOSUM45">EBLOSUM45</option>\n+            <option value="EBLOSUM50">EBLOSUM50</option>\n+            <option value="EBLOSUM55">EBLOSUM55</option>\n+            <option value="EBLOSUM60">EBLOSUM60</option>\n+            <option value="EBLOSUM62">EBLOSUM62</option>\n+            <option value="EBLOSUM62-12">EBLOSUM62-12</option>\n+            <option value="EBLOSUM65">EBLOSUM65</option>\n+            <option value="EBLOSUM70">EBLOSUM70</option>\n+            <option value="EBLOSUM75">EBLOSUM75</option>\n+            <option value="EBLOSUM80">EBLOSUM80</option>\n+            <option value="EBLOSUM85">EBLOSUM85</option>\n+            <option value="EBLOSUM90">EBLOSUM90</option>\n+            <option value="EBLOSUMN">EBLOSUMN</option>\n+            <option value="EDNAFULL">EDNAFULL</option>\n+            <option value="EDNAMAT">EDNAMAT</option>\n+            <option value="EDNASIMPLE">EDNASIMPLE</option>\n+            <option value="ENUC.4.2">ENUC.4.2</option>\n+            <option value="ENUC.4.4">ENUC.4.4</option>\n+            <option value="EPAM10">EPAM10</option>\n+            <option value="EPAM100">EPAM100</option>\n+            <option value="EPAM110">EPAM110</option>\n+            <option value="EPAM120">EPAM120</option>\n+            <option value="EPAM130">EPAM130</option>\n+            <option value="EPAM140">EPAM140</option>\n+            <option value="EPAM150">EPAM150</option>\n+            <option value="EPAM160">EPAM160</option>\n+            <option value="EPAM170">EPAM170</option>\n+            <option value="EPAM180">EPAM180</option>\n+            <option value="EPAM190">EPAM190</option>\n+            <option value="EPAM20">EPAM20</option>\n+            <option value="EPAM200">EPAM200</option>\n+            <option value="EPAM210">EPAM210</option>\n+            <option value="EPAM220">EPAM220</option>\n+            <option value="EPAM230">EPAM230</option>\n+            <option value="EPAM240">EPAM240</option>\n+            <option value="EPAM250">EPAM250</option>\n+            <option value="EPAM260">EPAM260</option>\n+            <option value="EPAM270">EPAM270</option>\n+            <option value="EPAM280">EPAM280</option>\n+            <option value="EPAM290">EPAM290</option>\n+            <option value="EPAM30">EPAM30</option>\n+            <option value="EPAM300">EPAM300</option>\n+            <option value="EPAM310">EPAM310</option>\n+            <option value="EPAM320">EPAM320</option>\n+            <option value="EPAM330">EPAM330</option>\n+            <option value="EPAM340">EPAM340</option>\n+            <option value="EPAM350">EPAM350</option>\n+            <option value="EPAM360">EPAM360</option>\n+            <option value="EPAM370">EPAM370</option>\n+            <option value="EPAM380">EPAM380</option>\n+            <option value="EPAM390">EPAM390</option>\n+       '..b'ue="EPAM440">EPAM440</option>\n+            <option value="EPAM450">EPAM450</option>\n+            <option value="EPAM460">EPAM460</option>\n+            <option value="EPAM470">EPAM470</option>\n+            <option value="EPAM480">EPAM480</option>\n+            <option value="EPAM490">EPAM490</option>\n+            <option value="EPAM50">EPAM50</option>\n+            <option value="EPAM500">EPAM500</option>\n+            <option value="EPAM60">EPAM60</option>\n+            <option value="EPAM70">EPAM70</option>\n+            <option value="EPAM80">EPAM80</option>\n+            <option value="EPAM90">EPAM90</option>\n+            <option value="SSSUB">SSSUB</option>\n+        </param>\n+    </xml>\n+\n+    <xml name="param_brief">\n+        <param argument="-brief" type="select" label="Brief identity and similarity">\n+            <option value="yes">Yes</option>\n+            <option value="no">No</option>\n+        </param>\n+    </xml>\n+\n+    <xml name="gap_penalties">\n+        <param argument="-gapopen" type="float" value="10.0" min="1.0" max="100.0" label="Gap open penalty" />\n+        <param argument="-gapextend" type="float" value="0.5" min="0.0" max="10.0"  label="Gap extension penalty" />\n+    </xml>\n+\n+    <xml name="endgap_penalties">\n+        <conditional name="endgap">\n+            <param argument="-endweight" type="select" label="Apply end gap penalties?" help="">\n+                <option value="yes">yes</option>\n+                <option value="no" selected="true">no</option>\n+            </param>\n+            <when value="yes">\n+                <param argument="-endopen" type="float" value="10.0" min="0.0" max="100.0" label="Penalty for creation of and end gap." help="The best value depends on the choice of comparison matrix. The default value assumes you are using the EBLOSUM62 matrix for protein sequences, and the EDNAFULL matrix for nucleotide sequences."/>\n+                <param argument="-endextend" type="float" value="0.5" min="0.0" max="10.0" label="The end gap extensionpenalty" help="this penalty is added to the end gap penalty for each base or residue in the end gap."/>\n+            </when>\n+            <when value="no"/>\n+        </conditional>\n+    </xml>\n+\n+    <xml name="choose_alignment_output_format">\n+        <param name="out_format1" type="select" label="Output alignment file format">\n+            <option value="srspair">SRS pair (p)</option>\n+            <option value="simple">Simple (m)</option>\n+            <option value="fasta">FASTA (m)</option>\n+            <option value="msf">MSF (m)</option>\n+            <option value="srs">SRS (m)</option>\n+            <option value="pair">Pair (p)</option>\n+            <option value="markx0">Markx0 (p)</option>\n+            <option value="markx1">Markx1 (p)</option>\n+            <option value="markx2">Markx2 (p)</option>\n+            <option value="markx3">Markx3 (p)</option>\n+            <option value="markx10">Markx10 (p)</option>\n+            <option value="score">Score (p)</option>\n+        </param>\n+    </xml>\n+\n+    <xml name="change_alignment_output_format">\n+        <change_format>\n+            <when input="out_format1" value="fasta" format="fasta"/>\n+            <when input="out_format1" value="score" format="score"/>\n+            <when input="out_format1" value="pair" format="pair"/>\n+            <when input="out_format1" value="srspair" format="srspair"/>\n+            <when input="out_format1" value="simple" format="simple"/>\n+            <when input="out_format1" value="srs" format="srs"/>\n+            <when input="out_format1" value="msf" format="msf"/>\n+            <when input="out_format1" value="markx0" format="markx0"/>\n+            <when input="out_format1" value="markx1" format="markx1"/>\n+            <when input="out_format1" value="markx2" format="markx2"/>\n+            <when input="out_format1" value="markx3" format="markx3"/>\n+            <when input="out_format1" value="markx10" format="markx10"/>\n+        </change_format>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.fasta Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,2 @@\n+>hg17\n+gtttgccatcttttgctgctctagggaatccagcagctgtcaccatgtaaacaagcccaggctagaccaGTTACCCTCATCATCTTAGCTGATAGCCAGCCAGCCACCACAGGCAtgagtcaggccatattgctggacccacagaattatgagctaaataaatagtcttgggttaagccactaagttttaggcatagtgtgttatgtaTCTCACAAACATATAAGACTGTGTGTTTGTTGACTGGAGGAAGAGATGCTATAAAGACCACCTTTTAAAACTTCCC-------------------------------AAATACT-GCCACTGATGTCCTG-----ATGGAGGTA-------TGAA-------------------AACATCCACTAAAATTTGTGGTTTATTCATTTTTCATTATTTTGTTTAAGGAGGTCTATAGTGGAAGAGGGAGATATTTGGggaaatt---ttgtatagactagctttcacgatgttagggaattattattgtgtgataatggtcttgcagttaca-cagaaattcttccttattttttgggaa---gcaccaaag----tagggat---aaaatgtcatgatgtgtgcaatacactttaaaatgtttttg-----ccaaaataatt----------------aatgaagc--aaatatggaaa-ataataattattaaatctaggtgatgggtatattgtagttcactatagtattgcacacttttctgtatgtttaaatttttcatttaaaaaaaaactttgagc-----tagacaccaggctatgagctaggagcatagcaatgaccaa----------------------------------------------------------------------------------------------atagactcctaccaa--------------------------------------------------ctc-aaagaatgcacattctCTGGGAAACATGTTTCCATTAGGAAGCCTCGAATGCAATGTGACTGTGGTCTCCAGGACCTG-TGTGATCCTGGCTTTTCCTGTTCCCTCCG---CATCATCACTGCAGGTGTGTTTTCCCAAGTTTTAAACATTTA------CCTTCCCAGTGGCCTTGCGTCTAGAGGAATCCCTGTATAGTGGT-ACATGAATATAACACATAACAAA-AATCATCTCTATGGTGTGTGTTGTTCCTGGGGTTCAattcagcaaattttccctg-ggcacccatgtgttcttggcactggaaaagtaccgggactgaaacagttgatggccca-atccctgtcctct---taaaacctaagggaggagaTGGAAAGGGG-CACCCAACCCAGACTGAGAGACAGGAATTAGCTGCAAGGGGAACTAGGAAAAGCTTCTTTA---AGGATGGAGAGGCCCTAGTGGAAT-GGGGAGATTCTTCCGGGAGAAGCGATGGATGCACAGTTGGGCATCCCCACAGACGGACTGGAAAGAAAAAAGGCCTGGAGGAATCAATGTG-------CAATGTATGTGTGTTCCCTGGTTcaagggctgg-gaactttctcta-aagggccaggtagaaaacattttaggctttctaagccaag--gcaaaat-tgaggatattacatgggtacttatacaacaagaataaacaatt---tacacaattttttgttgacagaattcaaaactttat----agacacagaaatgcaaatttcctgtaattttcccat-gagaactattcttct--tttgttttgttttgcgacAGGGTTGCGCtgatcctcccgcctcagtctccctaagtgctgagatgttgcaggaagtcagggaccccgaacagagagatcggctggagccgtggcagaggaacataaattttgaagatttcattttaatatggacacttatcagttcccaaataatacttttataattttttatgcctgtctttgctttaatctcttaatcctgttatcttcataagctaaggatgtacgtcacctcaggaccactgtgataattgtgttaactgtacagattgattgcaaaacatgtgtgtttgaacaatatgaaatcagtgcaccttgaaaaagagcagaataacagcaatttttagggaacaagggaagacaactataaggtctgactgcctgcggggtcgggcaaagggagccatatttttcttcttgcagagagcctataaatagacctgcaagtaggagagatattgctaatttcttttgctagcatggaatattaatattaacaccctgggaaaggaatgcattcctggggggaggtctataaatggccgctctgggaatgtctatcctacgcaatggagataaggactgagatacgccctggtctcctgcagtaccctcaggcttactagggtggtgaaaaactccgccctggtaaatttgtggtcagaccagttttctgctctcgaacactgttttctgttgtttaagatgtttatcaagacaatacgtgcaccgctgaacacagacccttatcagtagttctcctttttgccctttgaagcatgtgatctactccctgttttacaccccctcaccttttgaaacccttaataaaaaacttgctggttt-gaggctcaggtgggcatcacagtactaccgatatgtgatgtcacccccggcggcccagctgtaaaattcctctctttgtactctctctctttatttctcagccagctgacacttatggaaaatagaaagaacctacgttgaaatattgggggcaggttcccccaataTCTGGTGCCCAACGTGGGAtactgagattacaagcatgagccactgcatctggcctcttcttttgatttttttttttcaaacttttacaaatgtagaaaccattcttagcttttgggcattaccaaacccggcagtgg-caggctcggttcaccaacgtcatttgcagttccccgCTTTATGTTATGGgttttgttttgttttgtttttttt-attgagacagagtttcactcttgttgcccaggctgtagtgcaatggtctgatcttggctcactgcaacctccacttcccaggttcaagccattctcctgcctcagcctctcaagtagctgggattacagacactcaccaccacacctggctaattttgtatttttagtagagatgaggtttcaccatgttggccaggctggtctcgaaatcctgacctcaggtgatccacccaccttggcctcccaaagtgctgggattacaggcttgagctaccacgcctggctGGGTTGGTTCTCAATGGAGTGGTTTGTTTTTGGAGCTGCTCT-GCGCAGtggggaccagaataggcctg-------------------ggttcctagcccattgctattcctt----accagctgtggattctaaggaaagtcatttaacctcgctggaccttag-attcctcatccctgaaGCCCAAGGGTaaaacaaaacaaaacaaaacaaaacaaaccaaCCCATCATGTAAAGCGGGGAACTACAAACGATACAGGTGAAACATGCCTACCACACCACTCACAGGCT--ATGATGACAAAAACGTGGCTACATCTGGGACCACCCCCCAACCCCCACTTTGTACGTAGGAAATACGGAGTTGAGGATGGAGACCCACAGTATGTCCAGAGTGTCCCCAAAGGCCACAGTGCCCGCCTGGAGCCCTCCAGAGAGCGTGCACTCCCTGGGGTGCCAGCCAGAGACAACTTGCCCTGAGGCTTGGAACTCGATTCTCCGCGTGCCAGAGAAGGGGTGGGACTTCAGAACCCCCAACCCCGCAATCTGGGTCGGGGAGCCTGGCGCACTGCGGGCCGCTCCCTCTAACCCTGGGCTTCCCTG------GCGTCCAGGGCCGTCGG------'..b'gataatggggatatcacacacta-ttcacaaggttgttatgaggcctaaattagctaaagcaATTGAATCCTCCTTACCCCCTGCATGGAGCTCTCTGGAGACTTCCACGTCTCCTGGTCATTGTGGGTGTCTTATGGTA-GTCTTGGGCAGTTAGGGAGAAGTTAGGTGTCTGGAAGCAAAGATGGCTCAGAACTAGATAGAGTC-TTGGGCATTTTATA-GATAAAAACTCTT--GTCTCCtttaaaaataataaaaaaaaattaGCTGGGCATATTAGCCACTCAGCAAGACTGCACGTGATAGATCCCGAGTGCCCCACCTTGGGTGGTGTAATACACAATATCACGGGAGCCCCGGGTAGTAACCACGGAGGTGTCAGCCTCAGTGCTGTGGGCAGATG-GATGGGGAGAGCC--TCCCGG-AACTGGAGTCACTGGAGCA----------------------------GGGTTGGGGGGCCTCACTGAGGGTACGGCCTTGATCTCTAAGGAGGAGGGACTGCCTGGAAAAGC-TGACTGGGAGGGAGGACTCGGCTGGGGGTAGAAGGGA----------CTAGGGAAGGCTGGGGGTGGGGGTGCTTATGGAGGACCTCAGATGCCTGGGGAACAGACTCCACTAAATAAAACATATGAAACCATGGCTGGTTCTTCAGCAGAGGCCATGTAGAGAAAGGAATGACCTAGGAAAGTTGGCCTGGAAGTGGAGGGAAGGATGGTGTGGGAAAAGCAGGAA--------TCTCGGAGACCAGCTTAGAGGCTTGGCAGTCACCTGGGTGCAGG-ATACAAGGGCCTGAGCCAAAGTGGTGAGGGAGGGTGGAAGGAGGCAGCCCAGAGAATGACCCTCCATGCCCACGGGGAAGGCAGAGGGCTCT-GAGAGCGA--TTCCTCCCACATG-CT-GAGCACTTGTTCTCCCTCTTCCTCCTGCATAGCAGTCAGTCTCCTCCAAACAGAAAGTCACCGGTTTGGACTTCATTCCTGGGCTCCACCCCATCCTGACCTTATCCAAGATGGACCAGACACTGGCAGTCTACCAACAGATCCTCACCAGTATGCCTTCCAGAAACGTGATCCAAATATCCAACGACCTGGAGAACCTCCGGGATCTTCTTCACGTGCTGGCCTTCTCTAAGAGCTGCCACTTGCCCTGGGCCAGTGGCCTGGAGACCTTGGACAGCCTGGGGGGTGTCCTGGAAGCTTCAGGCTACTCCACAGAGGTGGTGGCCCTGAGCAGGCTGCAGGGGTCTCTGCAGGACATGCTGTGGCAGCTGGACCTCAGCCCTGGGTGCTGAGGCCTTGAAGGTCACTCTTCCTGCAAGGACTACGTTAAGGGAAGGAACTCTGGCTTCCAGGTATCTCCAGGATTGAAGAGCATTGCATGGACACCCCTTATCCAGGACTCTGTCAATT--TCCCTGACTCCTCTAAGCCACTCTTCCAAAGGCATAAGACCCTAAGCCTCCTTTTGCTTGAAACCAAAGATATATACACAGGATCCTATTCTCACCAGGAAGGGGG-TCCACCC-AGCAAAGAGTGGGCTGCATCTGGGATTCCCACCAAGGTCTTCAGCCATCA---ACAAGAGTTGTCTTGTCCCCTCT-TGACCCATCT-----------------CCCCCTCACTGAATGCCTCAATGTGACCAGGGGTGATTTCAGAGAGGGCAGAGGGGTAGGCAGAGCCTTTGGATGACCA--GAACAAGGTTCCCTCTGAGAATTCCAAGGAGTTCCATGAAGACCACATCCACACACG--CAGGAACTCCC--AGCAACACAAGCTGGAA---GCACATGTTTATTTATTCTGCATTTTATTCTGGATGGATTTGAAGCAAAGCACCAGCTTCTCCAGGCTCTTTGGGGTCAGCCAGGGCCAGGGGTCTCCCTGGAGTGCAGTTTCCAATCCCATAGATGGGTC-TGGCTGAGCTGAACCCA---TTTTGAGTGACT----CGAGGGTTGGG-TTCATCTGAGCAAGAGCTGGCAAAGGTGGCTCTCCAGTTAGTTCTCTCGTAACTGGTTTCATTTCTACTGTGACTGATGTTACATCACAGTGTTTGCAATGGTGTTGCCCTGAGTGGATCTCCAAGGACCAGGTTATTTTAAAA---AGATTTGTTTTGTCAAGTGTCATATGTAGGTGTCTGCACCCAGGGGTGGG-GAATGTTTGGGCAGAAGGGAGAAGGATCTAGAATGTGTTTTCTGAATAACATTTGTGTGGTGGGTTCTTTGGAAGGAGTGAGA-TCATTTTCTTATCTTCTGCAATTGCTTAGGATGTTTTTCATGAAAA------------TAGCTCTTTCAG-GGGGGTTGTGAGGCCTGGCCAGGCACCCCCTGGAGAGAAGTTTCTGGCCCTGGCTGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAAAGGGCTGAAAGCCATTTGTTGGGGCAGTGGTAAGCTCTGGCTTTCTCCGACTGCTAGGGAGTGGTCTTTCCTATCATGGAGTGACGGTCCCACACTGGTGACTGCGATCTTCAGAGCAGGGGTCCTTGGTGT-GACCCTCTGAATGGTCCAGGGTTGATCACACTCTGGGTTTATTACATGGCAG-----TGTTCCTATTTGGGGCTTGCATGCCAAATTGTAGTTCTTGTCTGATTGGCTCACCC-AAGCAAGGCCAAAATTACCAAAAATCTTGGGGGG--TTTTTACTC-CAGTGGTGAAGAAAACTCCTTTAGCAGG-TGGTCCTGAGACCT-GACAAGCACTGCTAGGCGAGTGCCAGGACTCCCCAGGCCAGGCCACCAGGATGGCCCTTCCCACTGGAGGTCACATTCAGGAAGATGAAAGAGGAGGTTTGGGGTCTGCCACCATCCTGCTGCTGTGTTTTTGCTATCACACAGTGGGTGGTGGATCTGTCCAAGGAAACTTGAATCAAAGCAGTTAAC-TTTAAGactgagcacctgcttcatgctcagccctgactggtgctataggctggagaagctcacccaataaacattaagatt-gaggcctgccctcagggatcttgcattcccagtggTCAAACC-GCACTCACCCATGTGCCAAGGTGGGGTA-TTTACCACAGCAG--CTGAACAGCCAAATGCATGGTGCAGTTGACAGCAGGTGGGAAATGGTATGAGCTGAGGGGGGCCGTGCCCAGGGGCCCACAGG-GAACCCTGCTTGCACTTTGTAACATGTTTA-----CTTTTCagggcatcttagctt---ctatta-----tagccacatccctttga---aacaagataactgagaatttaaaaataagaa-----aata--TGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAAATGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCATGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCA\n'
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.fasta Mon Jan 20 16:22:05 2025 +0000
b
@@ -0,0 +1,11 @@
+>Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;
+gttcgatgcc taaaatacct tcttttgtcc ctacacagac cacagttttc ctaatggctt
+tacaccgact agaaattctt gtgcaagcac taattgaaag cggttggcct agagtgttac
+cggtttgtat agctgagcgc gtctcttgcc ctgatcaaag gttcattttc tctactttgg
+aagacgttgt ggaagaatac aacaagtacg agtctctccc ccctggtttg ctgattactg
+gatacagttg taataccctt cgcaacaccg cgtaactatc tatatgaatt attttccctt
+tattatatgt agtaggttcg tctttaatct tcctttagca agtcttttac tgttttcgac
+ctcaatgttc atgttcttag gttgttttgg ataatatgcg gtcagtttaa tcttcgttgt
+ttcttcttaa aatatttatt catggtttaa tttttggttt gtacttgttc aggggccagt
+tcattattta ctctgtttgt atacagcagt tcttttattt ttagtatgat tttaatttaa
+aacaattcta atggtcaaaa a
\ No newline at end of file
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needle_out.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needle_out.fasta Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,676 @@\n+>Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+--------------------------------------------------------gtt-\n+-c----------gatgccta-aaataccttcttttg-----tcccta-------cacaga\n+-------------------------ccac-------------------------------\n+-------------------agt--------------------------------------\n+--tttc-c--------------------------taatgg-ct-----ttacaccgacta\n+gaaattct-----------------------------------------------tgtgc\n+aa-gcac---------------------taattga--aagc-------------------\n+-----------ggt--tgg-------------c-ctagagtgttac--------cggttt\n+gtat---------------------------agc--------------tgagc-----gc\n+-----------------gtctcttgcc--ctgatcaaaggtt-cattttctctactttgg\n+aagacgttgt--------ggaa-----gaatacaacaagtacgagtc-----tctcc---\n+-cc--------cctgg--tttgctgat----------------tactgg-----atac--\n+-agttgtaatac-----ccttc-----------gcaac----------------------\n+-ac----------cgcgtaact------atctatatg----------------------a\n+att------attttccct-------------ttatt--------atatgt----------\n+----agtaggt-------------tcgtctttaatctt----------------------\n+-cc----------------------ttta---gcaag-----------------tctttt\n+--------------------------------------------------------actg\n+tt----ttc--------------------------gacc--------tcaatgttca---\n+------tgttc-----tt-----------------------------aggttg------t\n+ttt---------------------------ggataat--atgcggt--------------\n+------cagttt----aatcttcgttgtt-tc----ttc------tta---------aaa\n+t----attt------att----catg-----------------gtttaatttt------t\n+ggtt-----------------------------tgtact----tgt---------tcagg\n+ggcc----------agttc---------------------------attat-----ttac\n+tctgtttgtat--ac----agcagtt-----------ctttt---atttttagtatg---\n+---at---tttaat---ttaaaacaatt-----------cta---atg---gtca-----\n+-----------aaaa---------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+-------------------'..b'ttccttactgtctttaagc\n+cttgttttcatcatctggataatggggatatcacacactattcacaaggttgttatgagg\n+cctaaattagctaaagcaATTGAATCCTCCTTACCCCCTGCATGGAGCTCTCTGGAGACT\n+TCCACGTCTCCTGGTCATTGTGGGTGTCTTATGGTAGTCTTGGGCAGTTAGGGAGAAGTT\n+AGGTGTCTGGAAGCAAAGATGGCTCAGAACTAGATAGAGTCTTGGGCATTTTATAGATAA\n+AAACTCTTGTCTCCtttaaaaataataaaaaaaaattaGCTGGGCATATTAGCCACTCAG\n+CAAGACTGCACGTGATAGATCCCGAGTGCCCCACCTTGGGTGGTGTAATACACAATATCA\n+CGGGAGCCCCGGGTAGTAACCACGGAGGTGTCAGCCTCAGTGCTGTGGGCAGATGGATGG\n+GGAGAGCCTCCCGGAACTGGAGTCACTGGAGCAGGGTTGGGGGGCCTCACTGAGGGTACG\n+GCCTTGATCTCTAAGGAGGAGGGACTGCCTGGAAAAGCTGACTGGGAGGGAGGACTCGGC\n+TGGGGGTAGAAGGGACTAGGGAAGGCTGGGGGTGGGGGTGCTTATGGAGGACCTCAGATG\n+CCTGGGGAACAGACTCCACTAAATAAAACATATGAAACCATGGCTGGTTCTTCAGCAGAG\n+GCCATGTAGAGAAAGGAATGACCTAGGAAAGTTGGCCTGGAAGTGGAGGGAAGGATGGTG\n+TGGGAAAAGCAGGAATCTCGGAGACCAGCTTAGAGGCTTGGCAGTCACCTGGGTGCAGGA\n+TACAAGGGCCTGAGCCAAAGTGGTGAGGGAGGGTGGAAGGAGGCAGCCCAGAGAATGACC\n+CTCCATGCCCACGGGGAAGGCAGAGGGCTCTGAGAGCGATTCCTCCCACATGCTGAGCAC\n+TTGTTCTCCCTCTTCCTCCTGCATAGCAGTCAGTCTCCTCCAAACAGAAAGTCACCGGTT\n+TGGACTTCATTCCTGGGCTCCACCCCATCCTGACCTTATCCAAGATGGACCAGACACTGG\n+CAGTCTACCAACAGATCCTCACCAGTATGCCTTCCAGAAACGTGATCCAAATATCCAACG\n+ACCTGGAGAACCTCCGGGATCTTCTTCACGTGCTGGCCTTCTCTAAGAGCTGCCACTTGC\n+CCTGGGCCAGTGGCCTGGAGACCTTGGACAGCCTGGGGGGTGTCCTGGAAGCTTCAGGCT\n+ACTCCACAGAGGTGGTGGCCCTGAGCAGGCTGCAGGGGTCTCTGCAGGACATGCTGTGGC\n+AGCTGGACCTCAGCCCTGGGTGCTGAGGCCTTGAAGGTCACTCTTCCTGCAAGGACTACG\n+TTAAGGGAAGGAACTCTGGCTTCCAGGTATCTCCAGGATTGAAGAGCATTGCATGGACAC\n+CCCTTATCCAGGACTCTGTCAATTTCCCTGACTCCTCTAAGCCACTCTTCCAAAGGCATA\n+AGACCCTAAGCCTCCTTTTGCTTGAAACCAAAGATATATACACAGGATCCTATTCTCACC\n+AGGAAGGGGGTCCACCCAGCAAAGAGTGGGCTGCATCTGGGATTCCCACCAAGGTCTTCA\n+GCCATCAACAAGAGTTGTCTTGTCCCCTCTTGACCCATCTCCCCCTCACTGAATGCCTCA\n+ATGTGACCAGGGGTGATTTCAGAGAGGGCAGAGGGGTAGGCAGAGCCTTTGGATGACCAG\n+AACAAGGTTCCCTCTGAGAATTCCAAGGAGTTCCATGAAGACCACATCCACACACGCAGG\n+AACTCCCAGCAACACAAGCTGGAAGCACATGTTTATTTATTCTGCATTTTATTCTGGATG\n+GATTTGAAGCAAAGCACCAGCTTCTCCAGGCTCTTTGGGGTCAGCCAGGGCCAGGGGTCT\n+CCCTGGAGTGCAGTTTCCAATCCCATAGATGGGTCTGGCTGAGCTGAACCCATTTTGAGT\n+GACTCGAGGGTTGGGTTCATCTGAGCAAGAGCTGGCAAAGGTGGCTCTCCAGTTAGTTCT\n+CTCGTAACTGGTTTCATTTCTACTGTGACTGATGTTACATCACAGTGTTTGCAATGGTGT\n+TGCCCTGAGTGGATCTCCAAGGACCAGGTTATTTTAAAAAGATTTGTTTTGTCAAGTGTC\n+ATATGTAGGTGTCTGCACCCAGGGGTGGGGAATGTTTGGGCAGAAGGGAGAAGGATCTAG\n+AATGTGTTTTCTGAATAACATTTGTGTGGTGGGTTCTTTGGAAGGAGTGAGATCATTTTC\n+TTATCTTCTGCAATTGCTTAGGATGTTTTTCATGAAAATAGCTCTTTCAGGGGGGTTGTG\n+AGGCCTGGCCAGGCACCCCCTGGAGAGAAGTTTCTGGCCCTGGCTGACCCCAAAGAGCCT\n+GGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAAAGGGCTGAAAGCCA\n+TTTGTTGGGGCAGTGGTAAGCTCTGGCTTTCTCCGACTGCTAGGGAGTGGTCTTTCCTAT\n+CATGGAGTGACGGTCCCACACTGGTGACTGCGATCTTCAGAGCAGGGGTCCTTGGTGTGA\n+CCCTCTGAATGGTCCAGGGTTGATCACACTCTGGGTTTATTACATGGCAGTGTTCCTATT\n+TGGGGCTTGCATGCCAAATTGTAGTTCTTGTCTGATTGGCTCACCCAAGCAAGGCCAAAA\n+TTACCAAAAATCTTGGGGGGTTTTTACTCCAGTGGTGAAGAAAACTCCTTTAGCAGGTGG\n+TCCTGAGACCTGACAAGCACTGCTAGGCGAGTGCCAGGACTCCCCAGGCCAGGCCACCAG\n+GATGGCCCTTCCCACTGGAGGTCACATTCAGGAAGATGAAAGAGGAGGTTTGGGGTCTGC\n+CACCATCCTGCTGCTGTGTTTTTGCTATCACACAGTGGGTGGTGGATCTGTCCAAGGAAA\n+CTTGAATCAAAGCAGTTAACTTTAAGactgagcacctgcttcatgctcagccctgactgg\n+tgctataggctggagaagctcacccaataaacattaagattgaggcctgccctcagggat\n+cttgcattcccagtggTCAAACCGCACTCACCCATGTGCCAAGGTGGGGTATTTACCACA\n+GCAGCTGAACAGCCAAATGCATGGTGCAGTTGACAGCAGGTGGGAAATGGTATGAGCTGA\n+GGGGGGCCGTGCCCAGGGGCCCACAGGGAACCCTGCTTGCACTTTGTAACATGTTTACTT\n+TTCagggcatcttagcttctattatagccacatccctttgaaacaagataactgagaatt\n+taaaaataagaaaataTGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCC\n+ATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCC\n+ATCCAGAATAAAACGCAGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAAATGACC\n+CCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCATGAC\n+CCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGAC\n+CCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGAT\n+GCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGAT\n+GCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGAT\n+GCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGAT\n+GCTTTGCTTCAAATCCATCCAGAATAAAACGCAGATGCTTTGCTTCAAATCCATCCAGAA\n+TAAAACGCA\n'
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needle_out.markx10
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needle_out.markx10 Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,866 @@\n+########################################\n+# Program: needle\n+# Rundate: Mon 20 Jan 2025 11:47:46\n+# Commandline: needle\n+#    -asequence /tmp/saskia/tmpsg97cix0/files/8/d/8/dataset_8d81948c-002a-4a33-ae33-6042793fd219.dat\n+#    -bsequence /tmp/saskia/tmpsg97cix0/files/f/b/9/dataset_fb94eaec-e786-4645-99b5-6936b8d9a907.dat\n+#    -outfile /tmp/saskia/tmpsg97cix0/job_working_directory/000/15/outputs/dataset_6b349e2c-cff0-4ae4-a35c-c035f1bcf9bd.dat\n+#    -gapopen 10.0\n+#    -gapextend 0.5\n+#    -brief yes\n+#    -aformat3 markx10\n+#    -auto\n+# Align_format: markx10\n+# Report_file: /tmp/saskia/tmpsg97cix0/job_working_directory/000/15/outputs/dataset_6b349e2c-cff0-4ae4-a35c-c035f1bcf9bd.dat\n+########################################\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: FC12044_91407_8_200_406_24\n+# 2: hg17\n+# Matrix: EDNAFULL\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 20141\n+# Identity:      21/20141 ( 0.1%)\n+# Similarity:    21/20141 ( 0.1%)\n+# Gaps:       20116/20141 (99.9%)\n+# Score: 60.5\n+# \n+#\n+#=======================================\n+\n+>>>FC12044_91407_8_200_406_24, 25 nt vs hg17, 20141 nt\n+; mp_name: EMBOSS\n+; mp_ver: 6.6.0.0\n+; pg_name: needle\n+; pg_ver: 6.6.0.0\n+; pg_matrix: EDNAFULL\n+; pg_gap-pen: -10.0 -0.5\n+>>#1\n+; sw_score: 60.5\n+; sw_ident: 0.001\n+; sw_overlap: 20141\n+>FC12044_91407_8_200_406_24 ..\n+; sq_len: 25\n+; sq_type: D\n+; al_start: 1\n+; al_stop: 25\n+; al_display_start: 1\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+----GTTAGCTCC-------CAC------------CTTAAGATGTTTA--\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+-------------------'..b'TCCTTACCCCCTGCATGGAGC\n+TCTCTGGAGACTTCCACGTCTCCTGGTCATTGTGGGTGTCTTATGGTAGT\n+CTTGGGCAGTTAGGGAGAAGTTAGGTGTCTGGAAGCAAAGATGGCTCAGA\n+ACTAGATAGAGTCTTGGGCATTTTATAGATAAAAACTCTTGTCTCCttta\n+aaaataataaaaaaaaattaGCTGGGCATATTAGCCACTCAGCAAGACTG\n+CACGTGATAGATCCCGAGTGCCCCACCTTGGGTGGTGTAATACACAATAT\n+CACGGGAGCCCCGGGTAGTAACCACGGAGGTGTCAGCCTCAGTGCTGTGG\n+GCAGATGGATGGGGAGAGCCTCCCGGAACTGGAGTCACTGGAGCAGGGTT\n+GGGGGGCCTCACTGAGGGTACGGCCTTGATCTCTAAGGAGGAGGGACTGC\n+CTGGAAAAGCTGACTGGGAGGGAGGACTCGGCTGGGGGTAGAAGGGACTA\n+GGGAAGGCTGGGGGTGGGGGTGCTTATGGAGGACCTCAGATGCCTGGGGA\n+ACAGACTCCACTAAATAAAACATATGAAACCATGGCTGGTTCTTCAGCAG\n+AGGCCATGTAGAGAAAGGAATGACCTAGGAAAGTTGGCCTGGAAGTGGAG\n+GGAAGGATGGTGTGGGAAAAGCAGGAATCTCGGAGACCAGCTTAGAGGCT\n+TGGCAGTCACCTGGGTGCAGGATACAAGGGCCTGAGCCAAAGTGGTGAGG\n+GAGGGTGGAAGGAGGCAGCCCAGAGAATGACCCTCCATGCCCACGGGGAA\n+GGCAGAGGGCTCTGAGAGCGATTCCTCCCACATGCTGAGCACTTGTTCTC\n+CCTCTTCCTCCTGCATAGCAGTCAGTCTCCTCCAAACAGAAAGTCACCGG\n+TTTGGACTTCATTCCTGGGCTCCACCCCATCCTGACCTTATCCAAGATGG\n+ACCAGACACTGGCAGTCTACCAACAGATCCTCACCAGTATGCCTTCCAGA\n+AACGTGATCCAAATATCCAACGACCTGGAGAACCTCCGGGATCTTCTTCA\n+CGTGCTGGCCTTCTCTAAGAGCTGCCACTTGCCCTGGGCCAGTGGCCTGG\n+AGACCTTGGACAGCCTGGGGGGTGTCCTGGAAGCTTCAGGCTACTCCACA\n+GAGGTGGTGGCCCTGAGCAGGCTGCAGGGGTCTCTGCAGGACATGCTGTG\n+GCAGCTGGACCTCAGCCCTGGGTGCTGAGGCCTTGAAGGTCACTCTTCCT\n+GCAAGGACTACGTTAAGGGAAGGAACTCTGGCTTCCAGGTATCTCCAGGA\n+TTGAAGAGCATTGCATGGACACCCCTTATCCAGGACTCTGTCAATTTCCC\n+TGACTCCTCTAAGCCACTCTTCCAAAGGCATAAGACCCTAAGCCTCCTTT\n+TGCTTGAAACCAAAGATATATACACAGGATCCTATTCTCACCAGGAAGGG\n+GGTCCACCCAGCAAAGAGTGGGCTGCATCTGGGATTCCCACCAAGGTCTT\n+CAGCCATCAACAAGAGTTGTCTTGTCCCCTCTTGACCCATCTCCCCCTCA\n+CTGAATGCCTCAATGTGACCAGGGGTGATTTCAGAGAGGGCAGAGGGGTA\n+GGCAGAGCCTTTGGATGACCAGAACAAGGTTCCCTCTGAGAATTCCAAGG\n+AGTTCCATGAAGACCACATCCACACACGCAGGAACTCCCAGCAACACAAG\n+CTGGAAGCACATGTTTATTTATTCTGCATTTTATTCTGGATGGATTTGAA\n+GCAAAGCACCAGCTTCTCCAGGCTCTTTGGGGTCAGCCAGGGCCAGGGGT\n+CTCCCTGGAGTGCAGTTTCCAATCCCATAGATGGGTCTGGCTGAGCTGAA\n+CCCATTTTGAGTGACTCGAGGGTTGGGTTCATCTGAGCAAGAGCTGGCAA\n+AGGTGGCTCTCCAGTTAGTTCTCTCGTAACTGGTTTCATTTCTACTGTGA\n+CTGATGTTACATCACAGTGTTTGCAATGGTGTTGCCCTGAGTGGATCTCC\n+AAGGACCAGGTTATTTTAAAAAGATTTGTTTTGTCAAGTGTCATATGTAG\n+GTGTCTGCACCCAGGGGTGGGGAATGTTTGGGCAGAAGGGAGAAGGATCT\n+AGAATGTGTTTTCTGAATAACATTTGTGTGGTGGGTTCTTTGGAAGGAGT\n+GAGATCATTTTCTTATCTTCTGCAATTGCTTAGGATGTTTTTCATGAAAA\n+TAGCTCTTTCAGGGGGGTTGTGAGGCCTGGCCAGGCACCCCCTGGAGAGA\n+AGTTTCTGGCCCTGGCTGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTG\n+CTTCAAATCCATCCAGAATAAAACGCAAAGGGCTGAAAGCCATTTGTTGG\n+GGCAGTGGTAAGCTCTGGCTTTCTCCGACTGCTAGGGAGTGGTCTTTCCT\n+ATCATGGAGTGACGGTCCCACACTGGTGACTGCGATCTTCAGAGCAGGGG\n+TCCTTGGTGTGACCCTCTGAATGGTCCAGGGTTGATCACACTCTGGGTTT\n+ATTACATGGCAGTGTTCCTATTTGGGGCTTGCATGCCAAATTGTAGTTCT\n+TGTCTGATTGGCTCACCCAAGCAAGGCCAAAATTACCAAAAATCTTGGGG\n+GGTTTTTACTCCAGTGGTGAAGAAAACTCCTTTAGCAGGTGGTCCTGAGA\n+CCTGACAAGCACTGCTAGGCGAGTGCCAGGACTCCCCAGGCCAGGCCACC\n+AGGATGGCCCTTCCCACTGGAGGTCACATTCAGGAAGATGAAAGAGGAGG\n+TTTGGGGTCTGCCACCATCCTGCTGCTGTGTTTTTGCTATCACACAGTGG\n+GTGGTGGATCTGTCCAAGGAAACTTGAATCAAAGCAGTTAACTTTAAGac\n+tgagcacctgcttcatgctcagccctgactggtgctataggctggagaag\n+ctcacccaataaacattaagattgaggcctgccctcagggatcttgcatt\n+cccagtggTCAAACCGCACTCACCCATGTGCCAAGGTGGGGTATTTACCA\n+CAGCAGCTGAACAGCCAAATGCATGGTGCAGTTGACAGCAGGTGGGAAAT\n+GGTATGAGCTGAGGGGGGCCGTGCCCAGGGGCCCACAGGGAACCCTGCTT\n+GCACTTTGTAACATGTTTACTTTTCagggcatcttagcttctattatagc\n+cacatccctttgaaacaagataactgagaatttaaaaataagaaaataTG\n+ACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAA\n+TAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAAT\n+CCATCCAGAATAAAACGCAGATGCTTTGCTTCAAATCCATCCAGAATAAA\n+ACGCAAATGACCCCAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATC\n+CATCCAGAATAAAACGCATGACCCCAAAGAGCCTGGAGAAGCTGATGCTT\n+TGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAA\n+GCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGATGCTTTGCT\n+TCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCTGGAGAAGCTG\n+ATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCCCAAAGAGCCT\n+GGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCAGACCC\n+CAAAGAGCCTGGAGAAGCTGATGCTTTGCTTCAAATCCATCCAGAATAAA\n+ACGCAGATGCTTTGCTTCAAATCCATCCAGAATAAAACGCA\n+\n+#---------------------------------------\n+#---------------------------------------\n'
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needle_out.score
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needle_out.score Mon Jan 20 16:22:05 2025 +0000
b
@@ -0,0 +1,4 @@
+Sequence hg17 20196 (604.5)
+
+#---------------------------------------
+#---------------------------------------
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needleall_input1.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needleall_input1.fa Mon Jan 20 16:22:05 2025 +0000
b
@@ -0,0 +1,62 @@
+>Illumina_Genomici_DNA_Adapters1_1
+GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG
+>Illumina_Genomic_DNA_Adapters1_2
+ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+>Illumina_Genomic_DNA_PCR_Primers1_1
+AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+>Illumina_Genomic_DNA_PCR_Primers1_2
+CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT
+>Illumina_Genomic_DNA_sequencing_primer
+ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+>Illumina_Paired_End_DNA_Adapters1_1
+GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
+>Illumina_Paired_End_DNA_Adapters1_2
+ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+>Illumina_Paired_End_DNA_PCR_Primers1_1
+AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
+>Illumina_Paired_End_DNA_PCR_Primers1_2
+CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
+>Illumina_Paired_End_DNA_sequencing_primer_1
+ACACTCTTTCCCTACACGACGCTCTTCCGATCT
+>Illumina_Paired_End_DNA_sequencing_primer_2
+CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
+>Illumina_DpnII_Gex_Adapters1_1
+GATCGTCGGACTGTAGAACTCTGAAC
+>Illumina_DpnII_Gex_Adapters1_2
+ACAGGTTCAGAGTTCTACAGTCCGAC
+>Illumina_DpnII_Gex_Adapters2_1
+CAAGCAGAAGACGGCATACGA
+>Illumina_DpnII_Gex_Adapters2_2
+TCGTATGCCGTCTTCTGCTTG
+>Illumina_DpnII_Gex_PCR_Primer_1
+CAAGCAGAAGACGGCATACGA
+>Illumina_DpnII_Gex_PCR_Primer_2
+AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+>Illumina_DpnII_Gex_sequencing_primer
+CGACAGGTTCAGAGTTCTACAGTCCGACGATC
+>Illumina_NlaIII_Gex_Adapters1_1
+TCGGACTGTAGAACTCTGAAC
+>Illumina_NlaIII_Gex_Adapters1_2
+ACAGGTTCAGAGTTCTACAGTCCGACATG
+>Illumina_NlaIII_Gex_Adapters2_1
+CAAGCAGAAGACGGCATACGANN
+>Illumina_NlaIII_Gex_Adapters2_2
+TCGTATGCCGTCTTCTGCTTG
+>Illumina_NlaIII_Gex_PCR_Primer_1
+CAAGCAGAAGACGGCATACGA
+>Illumina_NlaIII_Gex_PCR_Primer_2
+AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+>Illumina_NlaIII_Gex_sequencing_primer
+CCGACAGGTTCAGAGTTCTACAGTCCGACATG
+>Illumina_Small_RNA_RT_Primer
+CAAGCAGAAGACGGCATACGA
+>Illumina_Small_RNA_5p_Adapter
+GTTCAGAGTTCTACAGTCCGACGATC
+>Illumina_Small_RNA_3p_Adapter
+TCGTATGCCGTCTTCTGCTTGT
+>Illumina_Small_RNA_PCR_Primer_1
+CAAGCAGAAGACGGCATACGA
+>Illumina_Small_RNA_PCR_Primer_2
+AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA
+>Illumina_Small_RNA_sequencing_primer
+CGACAGGTTCAGAGTTCTACAGTCCGACGATC
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needleall_input2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needleall_input2.fq Mon Jan 20 16:22:05 2025 +0000
b
@@ -0,0 +1,100 @@
+@FC12044_91407_8_200_406_24
+GTTAGCTCCCACCTTAAGATGTTTA
++FC12044_91407_8_200_406_24
+SXXTXXXXXXXXXTTSUXSSXKTMQ
+@FC12044_91407_8_200_720_610
+CTCTGTGGCACCCCATCCCTCACTT
++FC12044_91407_8_200_720_610
+OXXXXXXXXXXXXXXXXXTSXQTXU
+@FC12044_91407_8_200_345_133
+GATTTTTTAACAATAAACGTACATA
++FC12044_91407_8_200_345_133
+OQTOOSFORTFFFIIOFFFFFFFFF
+@FC12044_91407_8_200_106_131
+GTTGCCCAGGCTCGTCTTGAACTCC
++FC12044_91407_8_200_106_131
+XXXXXXXXXXXXXXSXXXXISTXQS
+@FC12044_91407_8_200_916_471
+TGATTGAAGGTAGGGTAGCATACTG
++FC12044_91407_8_200_916_471
+XXXXXXXXXXXXXXXUXXUSXXTXW
+@FC12044_91407_8_200_57_85
+GCTCCAATAGCGCAGAGGAAACCTG
++FC12044_91407_8_200_57_85
+XFXMXSXXSXXXOSQROOSROFQIQ
+@FC12044_91407_8_200_10_437
+GCTGCTTGGGAGGCTGAGGCAGGAG
++FC12044_91407_8_200_10_437
+USXSXXXXXXUXXXSXQXXUQXXKS
+@FC12044_91407_8_200_154_436
+AGACCTTTGGATACAATGAACGACT
++FC12044_91407_8_200_154_436
+MKKMQTSRXMSQTOMRFOOIFFFFF
+@FC12044_91407_8_200_336_64
+AGGGAATTTTAGAGGAGGGCTGCCG
++FC12044_91407_8_200_336_64
+STQMOSXSXSQXQXXKXXXKFXFFK
+@FC12044_91407_8_200_620_233
+TCTCCATGTTGGTCAGGCTGGTCTC
++FC12044_91407_8_200_620_233
+XXXXXXXXXXXXXXXXXXXXXSXSW
+@FC12044_91407_8_200_902_349
+TGAACGTCGAGACGCAAGGCCCGCC
++FC12044_91407_8_200_902_349
+XMXSSXMXXSXQSXTSQXFKSKTOF
+@FC12044_91407_8_200_40_618
+CTGTCCCCACGGCGGGGGGGCCTGG
++FC12044_91407_8_200_40_618
+TXXXXSXXXXXXXXXXXXXRKFOXS
+@FC12044_91407_8_200_83_511
+GATGTACTCTTACACCCAGACTTTG
++FC12044_91407_8_200_83_511
+SOXXXXXUXXXXXXQKQKKROOQSU
+@FC12044_91407_8_200_76_246
+TCAAGGGTGGATCTTGGCTCCCAGT
++FC12044_91407_8_200_76_246
+XTXTUXXXXXRXXXTXXSUXSRFXQ
+@FC12044_91407_8_200_303_427
+TTGCGACAGAGTTTTGCTCTTGTCC
++FC12044_91407_8_200_303_427
+XXQROXXXXIXFQXXXOIQSSXUFF
+@FC12044_91407_8_200_31_299
+TCTGCTCCAGCTCCAAGACGCCGCC
++FC12044_91407_8_200_31_299
+XRXTSXXXRXXSXQQOXQTSQSXKQ
+@FC12044_91407_8_200_553_135
+TACGGAGCCGCGGGCGGGAAAGGCG
++FC12044_91407_8_200_553_135
+XSQQXXXXXXXXXXSXXMFFQXTKU
+@FC12044_91407_8_200_139_74
+CCTCCCAGGTTCAAGCGATTATCCT
++FC12044_91407_8_200_139_74
+RMXUSXTXXQXXQUXXXSQISISSO
+@FC12044_91407_8_200_108_33
+GTCATGGCGGCCCGCGCGGGGAGCG
++FC12044_91407_8_200_108_33
+OOOSSXXSXXOMKMOFMKFOKFFFF
+@FC12044_91407_8_200_980_965
+ACAGTGGGTTCTTAAAGAAGAGTCG
++FC12044_91407_8_200_980_965
+TOSSRXXXSSMSXMOMXIRXOXFFS
+@FC12044_91407_8_200_981_857
+AACGAGGGGCGCGACTTGACCTTGG
++FC12044_91407_8_200_981_857
+RXMSSXXXXSXQXQXFSXQFQKMXS
+@FC12044_91407_8_200_8_865
+TTTCCCACCCCAGGAAGCCTTGGAC
++FC12044_91407_8_200_8_865
+XXXFKOROMKOORMIMRIIKKORFF
+@FC12044_91407_8_200_292_484
+TCAGCCTCCGTGCCCAGCCCACTCC
++FC12044_91407_8_200_292_484
+XQXOSXXXXXUXXXXIXXXXQTOXF
+@FC12044_91407_8_200_675_16
+CTCGGGAGGCTGAGGCAGGGGGGTT
++FC12044_91407_8_200_675_16
+OXTXXXSXXQXXOXXKMXXMXOKQF
+@FC12044_91407_8_200_285_136
+CCAAATCTTGAATTGTAGCTCCCCT
++FC12044_91407_8_200_285_136
+OSXOQXXXXXSXXUXXTXXXXTRMS
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needleall_out.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needleall_out.fasta Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,3232 @@\n+>Illumina_Genomici_DNA_Adapters1_1\n+GATCGGAAG--AGCTCGTATGCCGTCTT---CTGCTTG\n+>FC12044_91407_8_200_406_24\n+--------GTTAGCTC-----CCACCTTAAGATGTTTA\n+>Illumina_Genomic_DNA_Adapters1_2\n+ACACTCTT---TCCCTACACGACGCTCTTCCGATCT---\n+>FC12044_91407_8_200_406_24\n+-----GTTAGCTCCC-AC--------CTTAAGATGTTTA\n+>Illumina_Genomic_DNA_PCR_Primers1_1\n+AATGATACGGCGACCACC--GAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT\n+>FC12044_91407_8_200_406_24\n+---GTTA--GCTCCCACCTTAAGATGTTTA------------------------------\n+>Illumina_Genomic_DNA_PCR_Primers1_2\n+CAAGCAGAAGACGGCATACG--AGCT-----CTTCCGATCT---\n+>FC12044_91407_8_200_406_24\n+-------------------GTTAGCTCCCACCTTAAGATGTTTA\n+>Illumina_Genomic_DNA_sequencing_primer\n+ACACTCTT---TCCCTACACGACGCTCTTCCGATCT---\n+>FC12044_91407_8_200_406_24\n+-----GTTAGCTCCC-AC--------CTTAAGATGTTTA\n+>Illumina_Paired_End_DNA_Adapters1_1\n+-----------------GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCCCACCTTAAGAT----------GTTTA--------------\n+>Illumina_Paired_End_DNA_Adapters1_2\n+ACACTCTT---TCCCTACACGACGCTCTTCCGATCT---\n+>FC12044_91407_8_200_406_24\n+-----GTTAGCTCCC-AC--------CTTAAGATGTTTA\n+>Illumina_Paired_End_DNA_PCR_Primers1_1\n+AATGATACGGCGACCACC--GAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT\n+>FC12044_91407_8_200_406_24\n+---GTTA--GCTCCCACCTTAAGATGTTTA------------------------------\n+>Illumina_Paired_End_DNA_PCR_Primers1_2\n+CAAGCAGAAGACGGCATACGAGATCGGT---CTCGGCATTCCTGCTGAACCGCTCTTCCG\n+ATCT---\n+>FC12044_91407_8_200_406_24\n+--------------------------GTTAGCTC---------------CCAC-CTTAAG\n+ATGTTTA\n+>Illumina_Paired_End_DNA_sequencing_primer_1\n+ACACTCTT---TCCCTACACGACGCTCTTCCGATCT---\n+>FC12044_91407_8_200_406_24\n+-----GTTAGCTCCC-AC--------CTTAAGATGTTTA\n+>Illumina_Paired_End_DNA_sequencing_primer_2\n+CGGT---CTCGGCATTCCTGCTGAACCGCTCTTCCGATCT---\n+>FC12044_91407_8_200_406_24\n+--GTTAGCTC---------------CCAC-CTTAAGATGTTTA\n+>Illumina_DpnII_Gex_Adapters1_1\n+GATCGTCGGACTGTAGAACTCTGAAC------------\n+>FC12044_91407_8_200_406_24\n+----GT-------TAG--CTCCCACCTTAAGATGTTTA\n+>Illumina_DpnII_Gex_Adapters1_2\n+ACAGGTTCAGAGTTCTACAG-TCCGAC-------------\n+>FC12044_91407_8_200_406_24\n+-----------GTT----AGCTCCCACCTTAAGATGTTTA\n+>Illumina_DpnII_Gex_Adapters2_1\n+--------CAAGCAGAAGACGGCATACGA\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCCCACCTTAAGA-TGTTTA---\n+>Illumina_DpnII_Gex_Adapters2_2\n+------TCGTATGCCGTCTT---CTGCTTG\n+>FC12044_91407_8_200_406_24\n+GTTAGCTC-----CCACCTTAAGATGTTTA\n+>Illumina_DpnII_Gex_PCR_Primer_1\n+--------CAAGCAGAAGACGGCATACGA\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCCCACCTTAAGA-TGTTTA---\n+>Illumina_DpnII_Gex_PCR_Primer_2\n+AATGATACGGCGACCACCGACAGGTTCAGA-GTTCTACAGTCCGA\n+>FC12044_91407_8_200_406_24\n+---GTTA--GCTCCCACC------TTAAGATGTT-TA--------\n+>Illumina_DpnII_Gex_sequencing_primer\n+CGACAGGTTCAGAGTTCTACAG-TCCGAC-----GATC----\n+>FC12044_91407_8_200_406_24\n+-------------GTT----AGCTCCCACCTTAAGATGTTTA\n+>Illumina_NlaIII_Gex_Adapters1_1\n+TCGGACTG-TAGAACTCTGAAC------------\n+>FC12044_91407_8_200_406_24\n+-------GTTAG--CTCCCACCTTAAGATGTTTA\n+>Illumina_NlaIII_Gex_Adapters1_2\n+ACAGGTTCAGAGTTCTACAG-TCCGAC------ATG----\n+>FC12044_91407_8_200_406_24\n+-----------GTT----AGCTCCCACCTTAAGATGTTTA\n+>Illumina_NlaIII_Gex_Adapters2_1\n+--------CAAGCAGAAGACGGCATACGANN\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCCCACCTTAAGA-TGTTTA-----\n+>Illumina_NlaIII_Gex_Adapters2_2\n+------TCGTATGCCGTCTT---CTGCTTG\n+>FC12044_91407_8_200_406_24\n+GTTAGCTC-----CCACCTTAAGATGTTTA\n+>Illumina_NlaIII_Gex_PCR_Primer_1\n+--------CAAGCAGAAGACGGCATACGA\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCCCACCTTAAGA-TGTTTA---\n+>Illumina_NlaIII_Gex_PCR_Primer_2\n+AATGATACGGCGACCACCGACAGGTTCAGA-GTTCTACAGTCCGA\n+>FC12044_91407_8_200_406_24\n+---GTTA--GCTCCCACC------TTAAGATGTT-TA--------\n+>Illumina_NlaIII_Gex_sequencing_primer\n+-------CCGACAGGTTCAGA-GTTCTACAGTCCGACATG\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCC--CACCTTAAGATGTT-TA------------\n+>Illumina_Small_RNA_RT_Primer\n+--------CAAGCAGAAGACGGCATACGA\n+>FC12044_91407_8_200_406_24\n+GTTAGCTCCCACCTTAAGA-TGTTTA---\n+>Illumina_Small_RNA_5p_Adapter\n+GTTCAGAGTTCTACAG-TCCGAC-----GATC---'..b'd_End_DNA_Adapters1_1\n+GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG--------------------\n+>FC12044_91407_8_200_285_136\n+---------------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_Paired_End_DNA_Adapters1_2\n+--ACACTCTT----------TCCCTACACGACGCTCTTCCGATCT\n+>FC12044_91407_8_200_285_136\n+CCA-AATCTTGAATTGTAGCTCCC---------CT----------\n+>Illumina_Paired_End_DNA_PCR_Primers1_1\n+AATGATACGGCGACCACCGAGATCTACA-------CTCTTTCCCTACACGACGCTCTTCC\n+GATCT\n+>FC12044_91407_8_200_285_136\n+-----------------CCAAATCTTGAATTGTAGCTC---CCCT---------------\n+-----\n+>Illumina_Paired_End_DNA_PCR_Primers1_2\n+CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATC\n+T\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATC-----TTGAATTGTAGCT---CCCCT---------\n+-\n+>Illumina_Paired_End_DNA_sequencing_primer_1\n+--ACACTCTT----------TCCCTACACGACGCTCTTCCGATCT\n+>FC12044_91407_8_200_285_136\n+CCA-AATCTTGAATTGTAGCTCCC---------CT----------\n+>Illumina_Paired_End_DNA_sequencing_primer_2\n+CGGTCTCGGCATTCC---TGCTGAA--------CCGCTCTTCCGATCT\n+>FC12044_91407_8_200_285_136\n+-------------CCAAATCTTGAATTGTAGCTCCCCT----------\n+>Illumina_DpnII_Gex_Adapters1_1\n+---GATCGTCGGACTGTAGAACT---CTGAAC\n+>FC12044_91407_8_200_285_136\n+CCAAATC-TTGAATTGTAG--CTCCCCT----\n+>Illumina_DpnII_Gex_Adapters1_2\n+--ACAGGTTCAGAGTTCTACAG-TCCGAC--\n+>FC12044_91407_8_200_285_136\n+CCAAATCTT--GAATTGT--AGCTCC--CCT\n+>Illumina_DpnII_Gex_Adapters2_1\n+CAAGCAGAAGACGGCATACGA----------------------\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_DpnII_Gex_Adapters2_2\n+------------TCGTA--TGCCGTCTTCTGCTTG\n+>FC12044_91407_8_200_285_136\n+CCAAATCTTGAATTGTAGCTCCCCT----------\n+>Illumina_DpnII_Gex_PCR_Primer_1\n+CAAGCAGAAGACGGCATACGA----------------------\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_DpnII_Gex_PCR_Primer_2\n+AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAG-TCCGA-\n+>FC12044_91407_8_200_285_136\n+-----------------CCAAATCTT--GAATTGT--AGCTCCCCT\n+>Illumina_DpnII_Gex_sequencing_primer\n+CGACAGGTTCAGAGTTCTACAGTCCGACGATC----\n+>FC12044_91407_8_200_285_136\n+CCAAATCTT--GAATTGTA---------GCTCCCCT\n+>Illumina_NlaIII_Gex_Adapters1_1\n+-------TCGGACTGTAGAACT---CTGAAC\n+>FC12044_91407_8_200_285_136\n+CCAAATCTTGAATTGTAG--CTCCCCT----\n+>Illumina_NlaIII_Gex_Adapters1_2\n+--ACAGGTTCAGAGTTCTACAG-TCCGACATG\n+>FC12044_91407_8_200_285_136\n+CCAAATCTT--GAATTGT--AGCTCC--CCT-\n+>Illumina_NlaIII_Gex_Adapters2_1\n+CAAGCAGAAGACGGCATACGANN--------------------\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_NlaIII_Gex_Adapters2_2\n+------------TCGTA--TGCCGTCTTCTGCTTG\n+>FC12044_91407_8_200_285_136\n+CCAAATCTTGAATTGTAGCTCCCCT----------\n+>Illumina_NlaIII_Gex_PCR_Primer_1\n+CAAGCAGAAGACGGCATACGA----------------------\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_NlaIII_Gex_PCR_Primer_2\n+AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAG-TCCGA-\n+>FC12044_91407_8_200_285_136\n+-----------------CCAAATCTT--GAATTGT--AGCTCCCCT\n+>Illumina_NlaIII_Gex_sequencing_primer\n+CCGACAGGTTCAGAGTTCTACAG-TCCGACATG\n+>FC12044_91407_8_200_285_136\n+-CCAAATCTT--GAATTGT--AGCTCC--CCT-\n+>Illumina_Small_RNA_RT_Primer\n+CAAGCAGAAGACGGCATACGA----------------------\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_Small_RNA_5p_Adapter\n+GTTCAGAGTTCTACAGTCCGACGATC------------------\n+>FC12044_91407_8_200_285_136\n+-----------------CCAA--ATCTTGAATTGTAGCTCCCCT\n+>Illumina_Small_RNA_3p_Adapter\n+------------TCGTA--TGCCGTCTTCTGCTTGT\n+>FC12044_91407_8_200_285_136\n+CCAAATCTTGAATTGTAGCTCCCCT-----------\n+>Illumina_Small_RNA_PCR_Primer_1\n+CAAGCAGAAGACGGCATACGA----------------------\n+>FC12044_91407_8_200_285_136\n+------------------CCAAATCTTGAATTGTAGCTCCCCT\n+>Illumina_Small_RNA_PCR_Primer_2\n+AATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAG-TCCGA-\n+>FC12044_91407_8_200_285_136\n+-----------------CCAAATCTT--GAATTGT--AGCTCCCCT\n+>Illumina_Small_RNA_sequencing_primer\n+CGACAGGTTCAGAGTTCTACAGTCCGACGATC----\n+>FC12044_91407_8_200_285_136\n+CCAAATCTT--GAATTGTA---------GCTCCCCT\n'
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needleall_out.pair
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needleall_out.pair Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,18267 @@\n+########################################\n+# Program: needleall\n+# Rundate: Mon 16 Dec 2024 17:13:24\n+# Commandline: needleall\n+#    -asequence /tmp/saskia/tmpet25av5g/files/e/e/8/dataset_ee891bb1-6c31-453a-8e30-69544c761887.dat\n+#    -bsequence /tmp/saskia/tmpet25av5g/files/0/7/0/dataset_07021fba-c6f2-478f-8665-0e6832dba409.dat\n+#    -outfile /tmp/saskia/tmpet25av5g/job_working_directory/000/9/outputs/dataset_b9c62224-ff77-442f-a21f-1e30d4ffb6fc.dat\n+#    -gapopen 10.0\n+#    -gapextend 0.5\n+#    -brief yes\n+#    -aformat3 pair\n+#    -auto\n+#    -datafile EPAM30\n+#    -endopen 13.37\n+#    -endextend 2.5\n+#    -minscore 1.0\n+# Align_format: pair\n+# Report_file: /tmp/saskia/tmpet25av5g/job_working_directory/000/9/outputs/dataset_b9c62224-ff77-442f-a21f-1e30d4ffb6fc.dat\n+########################################\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Genomici_DNA_Adapters1_1\n+# 2: FC12044_91407_8_200_406_24\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 37\n+# Identity:      15/37 (40.5%)\n+# Similarity:    15/37 (40.5%)\n+# Gaps:          16/37 (43.2%)\n+# Score: 61.5\n+# \n+#\n+#=======================================\n+\n+Illumina_Geno      1 GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTG----     33\n+                     |.|      |||||     ||..|||..| .||    \n+FC12044_91407      1 GTT------AGCTC-----CCACCTTAAG-ATGTTTA     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Genomic_DNA_Adapters1_2\n+# 2: FC12044_91407_8_200_406_24\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 40\n+# Identity:      14/40 (35.0%)\n+# Similarity:    14/40 (35.0%)\n+# Gaps:          22/40 (55.0%)\n+# Score: 54.0\n+# \n+#\n+#=======================================\n+\n+Illumina_Geno      1 ----ACACTCTTTCCCTACACGACGCTCTTCCGATCT---     33\n+                         .|      |||| ||        |||..|||.|   \n+FC12044_91407      1 GTTAGC------TCCC-AC--------CTTAAGATGTTTA     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Genomic_DNA_PCR_Primers1_1\n+# 2: FC12044_91407_8_200_406_24\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 60\n+# Identity:      17/60 (28.3%)\n+# Similarity:    17/60 (28.3%)\n+# Gaps:          37/60 (61.7%)\n+# Score: 68.5\n+# \n+#\n+#=======================================\n+\n+Illumina_Geno      1 AATGATACGGCGACCACC--GAGATCTACACTCTTTCCCTACACGACGCT     48\n+                        |.||  ||..|||||  .||||.|      ||.              \n+FC12044_91407      1 ---GTTA--GCTCCCACCTTAAGATGT------TTA--------------     25\n+\n+Illumina_Geno     49 CTTCCGATCT     58\n+                               \n+FC12044_91407     26 ----------     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Genomic_DNA_PCR_Primers1_2\n+# 2: FC12044_91407_8_200_406_24\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 47\n+# Identity:      11/47 (23.4%)\n+# Similarity:    11/47 (23.4%)\n+# Gaps:          35/47 (74.5%)\n+# Score: 49.0\n+# \n+#\n+#=======================================\n+\n+Illumina_Geno      1 CAAGCAGAAGACGGCATACG--AGCTCTTCCGATCT-----------     34\n+                                        |  |||||  || |.||           \n+FC12044_91407      1 -------------------GTTAGCTC--CC-ACCTTAAGATGTTTA     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Genomic_DNA_sequencing_primer\n+# 2: FC12044_91407_8_200_406_24\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 40\n+# Identity:      14/40 (35.0%)\n+# Similarity:    14/40 (35.0%)\n+# Gaps:          22/40 (55.0%)\n+# Score: 54.0\n+# \n+#\n+#=======================================\n+\n+Illumina_Geno      1 ----ACACTCTTTCCCTACACGACGCTCTTCCGATCT---     33\n+                         .|      |||| ||        |||..|||.|   \n+FC12044_91407      1 GTTAGC------TCCC-AC--------CTTAAGATGTTTA     25\n+\n+\n+#=======================================\n'..b'_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 34\n+# Identity:      17/34 (50.0%)\n+# Similarity:    17/34 (50.0%)\n+# Gaps:          11/34 (32.4%)\n+# Score: 62.5\n+# \n+#\n+#=======================================\n+\n+Illumina_NlaI      1 CCGACAGGTTC-AGAGTTCTA-CAGTCCGACATG     32\n+                     ||.|    .|| .||.||.|| |  |||  |.| \n+FC12044_91407      1 CCAA----ATCTTGAATTGTAGC--TCC--CCT-     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Small_RNA_RT_Primer\n+# 2: FC12044_91407_8_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 35\n+# Identity:       8/35 (22.9%)\n+# Similarity:     8/35 (22.9%)\n+# Gaps:          24/35 (68.6%)\n+# Score: 23.5\n+# \n+#\n+#=======================================\n+\n+Illumina_Smal      1 CAAGCAGAAGACGGCATACGA--------------     21\n+                     |   ||.|.     |.|  ||              \n+FC12044_91407      1 C---CAAAT-----CTT--GAATTGTAGCTCCCCT     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Small_RNA_5p_Adapter\n+# 2: FC12044_91407_8_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 32\n+# Identity:      14/32 (43.8%)\n+# Similarity:    14/32 (43.8%)\n+# Gaps:          13/32 (40.6%)\n+# Score: 46.0\n+# \n+#\n+#=======================================\n+\n+Illumina_Smal      1 ---GTTC-AGAGTTCTA-CAGTCCGACGATC-     26\n+                        ..|| .||.||.|| |  |||  |   | \n+FC12044_91407      1 CCAAATCTTGAATTGTAGC--TCC--C---CT     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Small_RNA_3p_Adapter\n+# 2: FC12044_91407_8_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 34\n+# Identity:      11/34 (32.4%)\n+# Similarity:    11/34 (32.4%)\n+# Gaps:          21/34 (61.8%)\n+# Score: 45.0\n+# \n+#\n+#=======================================\n+\n+Illumina_Smal      1 TCGTATGCC--GTCTTCTG-CTTGT---------     22\n+                            ||  .|||  || .||||         \n+FC12044_91407      1 -------CCAAATCT--TGAATTGTAGCTCCCCT     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Small_RNA_PCR_Primer_1\n+# 2: FC12044_91407_8_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 35\n+# Identity:       8/35 (22.9%)\n+# Similarity:     8/35 (22.9%)\n+# Gaps:          24/35 (68.6%)\n+# Score: 23.5\n+# \n+#\n+#=======================================\n+\n+Illumina_Smal      1 CAAGCAGAAGACGGCATACGA--------------     21\n+                     |   ||.|.     |.|  ||              \n+FC12044_91407      1 C---CAAAT-----CTT--GAATTGTAGCTCCCCT     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Small_RNA_PCR_Primer_2\n+# 2: FC12044_91407_8_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 49\n+# Identity:      14/49 (28.6%)\n+# Similarity:    14/49 (28.6%)\n+# Gaps:          29/49 (59.2%)\n+# Score: 54.0\n+# \n+#\n+#=======================================\n+\n+Illumina_Smal      1 AATGATACGGCGACCACCGACAGGTTC-AGAGTT----CTACAGTCCGA     44\n+                                     ||.|    .|| .||.||    ||.|   ||. \n+FC12044_91407      1 ----------------CCAA----ATCTTGAATTGTAGCTCC---CCT-     25\n+\n+\n+#=======================================\n+#\n+# Aligned_sequences: 2\n+# 1: Illumina_Small_RNA_sequencing_primer\n+# 2: FC12044_91407_8_200_285_136\n+# Matrix: EPAM30\n+# Gap_penalty: 10.0\n+# Extend_penalty: 0.5\n+#\n+# Length: 35\n+# Identity:      17/35 (48.6%)\n+# Similarity:    17/35 (48.6%)\n+# Gaps:          13/35 (37.1%)\n+# Score: 51.5\n+# \n+#\n+#=======================================\n+\n+Illumina_Smal      1 CGACAGGTTC-AGAGTTCTA-CAGTCCGACGATC-     32\n+                     |  || ..|| .||.||.|| |  |||  |   | \n+FC12044_91407      1 C--CA-AATCTTGAATTGTAGC--TCC--C---CT     25\n+\n+\n+#---------------------------------------\n+#---------------------------------------\n'
b
diff -r 000000000000 -r 2f0dc62d0a19 test-data/emboss_needleall_out.score
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/emboss_needleall_out.score Mon Jan 20 16:22:05 2025 +0000
b
b'@@ -0,0 +1,777 @@\n+Illumina_Genomici_DNA_Adapters1_1 FC12044_91407_8_200_406_24 38 (21.5)\n+Illumina_Genomic_DNA_Adapters1_2 FC12044_91407_8_200_406_24 39 (24.5)\n+Illumina_Genomic_DNA_PCR_Primers1_1 FC12044_91407_8_200_406_24 60 (31.0)\n+Illumina_Genomic_DNA_PCR_Primers1_2 FC12044_91407_8_200_406_24 44 (25.5)\n+Illumina_Genomic_DNA_sequencing_primer FC12044_91407_8_200_406_24 39 (24.5)\n+Illumina_Paired_End_DNA_Adapters1_1 FC12044_91407_8_200_406_24 49 (16.5)\n+Illumina_Paired_End_DNA_Adapters1_2 FC12044_91407_8_200_406_24 39 (24.5)\n+Illumina_Paired_End_DNA_PCR_Primers1_1 FC12044_91407_8_200_406_24 60 (31.0)\n+Illumina_Paired_End_DNA_PCR_Primers1_2 FC12044_91407_8_200_406_24 67 (21.0)\n+Illumina_Paired_End_DNA_sequencing_primer_1 FC12044_91407_8_200_406_24 39 (24.5)\n+Illumina_Paired_End_DNA_sequencing_primer_2 FC12044_91407_8_200_406_24 43 (21.0)\n+Illumina_DpnII_Gex_Adapters1_1 FC12044_91407_8_200_406_24 38 (14.5)\n+Illumina_DpnII_Gex_Adapters1_2 FC12044_91407_8_200_406_24 40 (24.5)\n+Illumina_DpnII_Gex_Adapters2_1 FC12044_91407_8_200_406_24 29 (12.0)\n+Illumina_DpnII_Gex_Adapters2_2 FC12044_91407_8_200_406_24 30 (12.0)\n+Illumina_DpnII_Gex_PCR_Primer_1 FC12044_91407_8_200_406_24 29 (12.0)\n+Illumina_DpnII_Gex_PCR_Primer_2 FC12044_91407_8_200_406_24 45 (41.0)\n+Illumina_DpnII_Gex_sequencing_primer FC12044_91407_8_200_406_24 42 (23.5)\n+Illumina_NlaIII_Gex_Adapters1_1 FC12044_91407_8_200_406_24 34 (12.5)\n+Illumina_NlaIII_Gex_Adapters1_2 FC12044_91407_8_200_406_24 40 (27.0)\n+Illumina_NlaIII_Gex_Adapters2_1 FC12044_91407_8_200_406_24 31 (12.0)\n+Illumina_NlaIII_Gex_Adapters2_2 FC12044_91407_8_200_406_24 30 (12.0)\n+Illumina_NlaIII_Gex_PCR_Primer_1 FC12044_91407_8_200_406_24 29 (12.0)\n+Illumina_NlaIII_Gex_PCR_Primer_2 FC12044_91407_8_200_406_24 45 (41.0)\n+Illumina_NlaIII_Gex_sequencing_primer FC12044_91407_8_200_406_24 40 (27.5)\n+Illumina_Small_RNA_RT_Primer FC12044_91407_8_200_406_24 29 (12.0)\n+Illumina_Small_RNA_5p_Adapter FC12044_91407_8_200_406_24 36 (23.5)\n+Illumina_Small_RNA_3p_Adapter FC12044_91407_8_200_406_24 31 (13.0)\n+Illumina_Small_RNA_PCR_Primer_1 FC12044_91407_8_200_406_24 29 (12.0)\n+Illumina_Small_RNA_PCR_Primer_2 FC12044_91407_8_200_406_24 45 (41.0)\n+Illumina_Small_RNA_sequencing_primer FC12044_91407_8_200_406_24 42 (23.5)\n+Illumina_Genomici_DNA_Adapters1_1 FC12044_91407_8_200_720_610 44 (17.5)\n+Illumina_Genomic_DNA_Adapters1_2 FC12044_91407_8_200_720_610 40 (31.5)\n+Illumina_Genomic_DNA_PCR_Primers1_1 FC12044_91407_8_200_720_610 61 (31.5)\n+Illumina_Genomic_DNA_PCR_Primers1_2 FC12044_91407_8_200_720_610 47 (20.5)\n+Illumina_Genomic_DNA_sequencing_primer FC12044_91407_8_200_720_610 40 (31.5)\n+Illumina_Paired_End_DNA_Adapters1_2 FC12044_91407_8_200_720_610 40 (31.5)\n+Illumina_Paired_End_DNA_PCR_Primers1_1 FC12044_91407_8_200_720_610 61 (31.5)\n+Illumina_Paired_End_DNA_PCR_Primers1_2 FC12044_91407_8_200_720_610 64 (33.5)\n+Illumina_Paired_End_DNA_sequencing_primer_1 FC12044_91407_8_200_720_610 40 (31.5)\n+Illumina_Paired_End_DNA_sequencing_primer_2 FC12044_91407_8_200_720_610 40 (33.5)\n+Illumina_DpnII_Gex_Adapters1_1 FC12044_91407_8_200_720_610 43 (20.0)\n+Illumina_DpnII_Gex_Adapters1_2 FC12044_91407_8_200_720_610 40 (9.0)\n+Illumina_DpnII_Gex_Adapters2_1 FC12044_91407_8_200_720_610 36 (11.0)\n+Illumina_DpnII_Gex_Adapters2_2 FC12044_91407_8_200_720_610 39 (15.5)\n+Illumina_DpnII_Gex_PCR_Primer_1 FC12044_91407_8_200_720_610 36 (11.0)\n+Illumina_DpnII_Gex_PCR_Primer_2 FC12044_91407_8_200_720_610 59 (10.0)\n+Illumina_DpnII_Gex_sequencing_primer FC12044_91407_8_200_720_610 44 (15.0)\n+Illumina_NlaIII_Gex_Adapters1_1 FC12044_91407_8_200_720_610 38 (20.0)\n+Illumina_NlaIII_Gex_Adapters1_2 FC12044_91407_8_200_720_610 42 (9.5)\n+Illumina_NlaIII_Gex_Adapters2_1 FC12044_91407_8_200_720_610 36 (7.0)\n+Illumina_NlaIII_Gex_Adapters2_2 FC12044_91407_8_200_720_610 39 (15.5)\n+Illumina_NlaIII_Gex_PCR_Primer_1 FC12044_91407_8_200_720_610 36 (11.0)\n+Illumina_NlaIII_Gex_PCR_Primer_2 FC12044_91407_8_200_720_610 59 (10.0)\n+Illumina_NlaIII_Gex_sequencing_primer FC12044_'..b'\n+Illumina_Paired_End_DNA_PCR_Primers1_2 FC12044_91407_8_200_675_16 67 (26.0)\n+Illumina_Paired_End_DNA_sequencing_primer_1 FC12044_91407_8_200_675_16 56 (10.0)\n+Illumina_Paired_End_DNA_sequencing_primer_2 FC12044_91407_8_200_675_16 43 (26.0)\n+Illumina_DpnII_Gex_Adapters1_1 FC12044_91407_8_200_675_16 43 (13.0)\n+Illumina_DpnII_Gex_Adapters1_2 FC12044_91407_8_200_675_16 40 (17.0)\n+Illumina_DpnII_Gex_Adapters2_1 FC12044_91407_8_200_675_16 33 (13.5)\n+Illumina_DpnII_Gex_Adapters2_2 FC12044_91407_8_200_675_16 42 (11.0)\n+Illumina_DpnII_Gex_PCR_Primer_1 FC12044_91407_8_200_675_16 33 (13.5)\n+Illumina_DpnII_Gex_PCR_Primer_2 FC12044_91407_8_200_675_16 51 (17.5)\n+Illumina_DpnII_Gex_sequencing_primer FC12044_91407_8_200_675_16 41 (22.0)\n+Illumina_NlaIII_Gex_Adapters1_1 FC12044_91407_8_200_675_16 38 (13.0)\n+Illumina_NlaIII_Gex_Adapters1_2 FC12044_91407_8_200_675_16 43 (17.0)\n+Illumina_NlaIII_Gex_Adapters2_1 FC12044_91407_8_200_675_16 35 (13.5)\n+Illumina_NlaIII_Gex_Adapters2_2 FC12044_91407_8_200_675_16 42 (11.0)\n+Illumina_NlaIII_Gex_PCR_Primer_1 FC12044_91407_8_200_675_16 33 (13.5)\n+Illumina_NlaIII_Gex_PCR_Primer_2 FC12044_91407_8_200_675_16 51 (17.5)\n+Illumina_NlaIII_Gex_sequencing_primer FC12044_91407_8_200_675_16 43 (21.5)\n+Illumina_Small_RNA_RT_Primer FC12044_91407_8_200_675_16 33 (13.5)\n+Illumina_Small_RNA_5p_Adapter FC12044_91407_8_200_675_16 48 (15.0)\n+Illumina_Small_RNA_3p_Adapter FC12044_91407_8_200_675_16 42 (7.0)\n+Illumina_Small_RNA_PCR_Primer_1 FC12044_91407_8_200_675_16 33 (13.5)\n+Illumina_Small_RNA_PCR_Primer_2 FC12044_91407_8_200_675_16 51 (17.5)\n+Illumina_Small_RNA_sequencing_primer FC12044_91407_8_200_675_16 41 (22.0)\n+Illumina_Genomici_DNA_Adapters1_1 FC12044_91407_8_200_285_136 40 (21.0)\n+Illumina_Genomic_DNA_Adapters1_2 FC12044_91407_8_200_285_136 45 (17.5)\n+Illumina_Genomic_DNA_PCR_Primers1_1 FC12044_91407_8_200_285_136 65 (30.0)\n+Illumina_Genomic_DNA_PCR_Primers1_2 FC12044_91407_8_200_285_136 39 (16.5)\n+Illumina_Genomic_DNA_sequencing_primer FC12044_91407_8_200_285_136 45 (17.5)\n+Illumina_Paired_End_DNA_Adapters1_1 FC12044_91407_8_200_285_136 52 (7.0)\n+Illumina_Paired_End_DNA_Adapters1_2 FC12044_91407_8_200_285_136 45 (17.5)\n+Illumina_Paired_End_DNA_PCR_Primers1_1 FC12044_91407_8_200_285_136 65 (30.0)\n+Illumina_Paired_End_DNA_PCR_Primers1_2 FC12044_91407_8_200_285_136 61 (21.0)\n+Illumina_Paired_End_DNA_sequencing_primer_1 FC12044_91407_8_200_285_136 45 (17.5)\n+Illumina_Paired_End_DNA_sequencing_primer_2 FC12044_91407_8_200_285_136 48 (18.5)\n+Illumina_DpnII_Gex_Adapters1_1 FC12044_91407_8_200_285_136 32 (27.5)\n+Illumina_DpnII_Gex_Adapters1_2 FC12044_91407_8_200_285_136 31 (13.5)\n+Illumina_DpnII_Gex_Adapters2_1 FC12044_91407_8_200_285_136 43 (6.0)\n+Illumina_DpnII_Gex_Adapters2_2 FC12044_91407_8_200_285_136 35 (17.5)\n+Illumina_DpnII_Gex_PCR_Primer_1 FC12044_91407_8_200_285_136 43 (6.0)\n+Illumina_DpnII_Gex_PCR_Primer_2 FC12044_91407_8_200_285_136 46 (12.0)\n+Illumina_DpnII_Gex_sequencing_primer FC12044_91407_8_200_285_136 36 (17.5)\n+Illumina_NlaIII_Gex_Adapters1_1 FC12044_91407_8_200_285_136 31 (26.5)\n+Illumina_NlaIII_Gex_Adapters1_2 FC12044_91407_8_200_285_136 32 (14.5)\n+Illumina_NlaIII_Gex_Adapters2_1 FC12044_91407_8_200_285_136 43 (2.0)\n+Illumina_NlaIII_Gex_Adapters2_2 FC12044_91407_8_200_285_136 35 (17.5)\n+Illumina_NlaIII_Gex_PCR_Primer_1 FC12044_91407_8_200_285_136 43 (6.0)\n+Illumina_NlaIII_Gex_PCR_Primer_2 FC12044_91407_8_200_285_136 46 (12.0)\n+Illumina_NlaIII_Gex_sequencing_primer FC12044_91407_8_200_285_136 33 (15.5)\n+Illumina_Small_RNA_RT_Primer FC12044_91407_8_200_285_136 43 (6.0)\n+Illumina_Small_RNA_5p_Adapter FC12044_91407_8_200_285_136 44 (15.5)\n+Illumina_Small_RNA_3p_Adapter FC12044_91407_8_200_285_136 36 (17.5)\n+Illumina_Small_RNA_PCR_Primer_1 FC12044_91407_8_200_285_136 43 (6.0)\n+Illumina_Small_RNA_PCR_Primer_2 FC12044_91407_8_200_285_136 46 (12.0)\n+Illumina_Small_RNA_sequencing_primer FC12044_91407_8_200_285_136 36 (17.5)\n+\n+#---------------------------------------\n+#---------------------------------------\n'