Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/necat commit 6946d81de9419c90e9bc4ea2f7bd5e4168dd6dd6 |
added:
macros.xml necat.xml test-data/test1.fa test-data/test1_head.fastq test-data/test1_tail.fasta |
b |
diff -r 000000000000 -r 6ee7eb5821f0 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Nov 25 14:24:27 2022 +0000 |
[ |
b'@@ -0,0 +1,250 @@\n+<?xml version="1.0"?>\r\n+\r\n+<macros>\r\n+ <token name="@TOOL_VERSION@">0.0.1_update20200803</token>\r\n+ <token name="@VERSION_SUFFIX@">0</token>\r\n+ <xml name="job_conf">\r\n+ <configfile name="job_configfile">\r\n+ <![CDATA[\r\n+ PROJECT=project\r\n+ ONT_READ_LIST=read_list.txt\r\n+ GENOME_SIZE=${genome_size}\r\n+ THREADS=\\${GALAXY_SLOTS:-4}\r\n+ \r\n+ MIN_READ_LENGTH=${min_read_length}\r\n+ PREP_OUTPUT_COVERAGE=${correction_coverage}\r\n+ \r\n+ OVLP_FAST_OPTIONS=-k ${adv.ovf.k} -z ${adv.ovf.z} -q ${adv.ovf.q} -b ${adv.ovf.b} -n ${adv.ovf.n} -a ${adv.ovf.a} -d ${adv.ovf.d} -e ${adv.ovf.e} -m ${adv.ovf.m} -j 0 -u 1\r\n+\r\n+ OVLP_SENSITIVE_OPTIONS=-k ${adv.ovs.k} -z ${adv.ovs.z} -q ${adv.ovs.q} -b ${adv.ovs.b} -n ${adv.ovs.n} -a ${adv.ovs.a} -d ${adv.ovs.d} -e ${adv.ovs.e} -m ${adv.ovs.m} -j 0 -u 1\r\n+\r\n+ CNS_FAST_OPTIONS=-a ${adv.cnf.a} -x ${adv.cnf.x} -y ${adv.cnf.y} -l ${adv.cnf.l} -e ${adv.cnf.e} -p ${adv.cnf.p} -r ${adv.cnf.r} -u ${adv.cnf.u} \r\n+ \r\n+ CNS_SENSITIVE_OPTIONS=-a ${adv.cns.a} -x ${adv.cns.x} -y ${adv.cns.y} -l ${adv.cns.l} -e ${adv.cns.e} -p ${adv.cns.p} -r ${adv.cns.r} -u ${adv.cns.u} \r\n+\r\n+ TRIM_OVLP_OPTIONS=-n ${adv.tov.n} -a ${adv.tov.a} -z ${adv.tov.z} -b ${adv.tov.b} -e ${adv.tov.e} -j 1 -u 1\r\n+ \r\n+ ASM_OVLP_OPTIONS=-n ${adv.aov.n} -a ${adv.aov.a} -z ${adv.aov.z} -b ${adv.aov.b} -e ${adv.aov.e} -j 1 -u 0\r\n+\r\n+ NUM_ITER=2\r\n+ \r\n+ #if $assembly.should_assemble == "yes":\r\n+ CNS_OUTPUT_COVERAGE=$assembly.assembly_coverage\r\n+ POLISH_CONTIGS=$assembly.polish_contigs\r\n+ #else:\r\n+ CNS_OUTPUT_COVERAGE=30\r\n+ POLISH_CONTIGS=false\r\n+ #end if \r\n+\r\n+ CLEANUP=1\r\n+ USE_GRID=false \r\n+ GRID_NODE=0 \r\n+ GRID_OPTIONS=\r\n+ SMALL_MEMORY=0 \r\n+ FSA_OL_FILTER_OPTIONS=\r\n+ --min_length={adv.fol.min_length} \r\n+ --max_length={adv.fol.max_length} \r\n+ --min_identity={adv.fol.min_identity}\r\n+ --min_aligned_length={adv.fol.min_aligned_length}\r\n+ --max_overhang={adv.fol.max_overhang}\r\n+ --min_coverage={adv.fol.min_coverage}\r\n+ --max_coverage={adv.fol.max_coverage}\r\n+ --max_diff_coverage={adv.fol.max_diff_coverage}\r\n+ --coverage_discard={adv.fol.coverage_discard}\r\n+ --bestn={adv.fol.bestn}\r\n+ --genome_size={adv.fol.genome_size}\r\n+ --coverage={adv.fol.coverage}\r\n+ --thread_size=\\${GALAXY_SLOTS:-4}\r\n+ --identity_global_deviation1={adv.fol.identity_global_deviation1}\r\n+ --identity_global_deviation2={adv.fol.identity_global_deviation2}\r\n+ --overhang_global_deviation1={adv.fol.overhang_global_deviation1}\r\n+ --overhang_global_deviation2={adv.fol.overhang_global_deviation2}\r\n+ --identity_local_deviation1={adv.fol.identity_local_deviation1}\r\n+ --identity_local_deviation2={adv.fol.identity_local_deviation2}\r\n+ --overhang_local_deviation1={adv.fol.overhang_local_deviation1}\r\n+ --overhang_local_deviation2={adv.fol.overhang_local_deviation2}\r\n+ --identity_local_condition={adv.fol.identity_local_condition}\r\n+ --local_low_coverage={adv.fol.local_low_coverage}\r\n+ --overlap_file_type=m4\r\n+\r\n+ FSA_ASSEMBLE_OPTIONS= \r\n+ --min_length={adv.fa.min_length}\r\n+ --min_identity={adv.fa.min_identity}\r\n+ --min_aligned_length={adv.fa.min_aligned_length}\r\n+ --min_contig_length={adv.fa.min_contig_length}\r\n+ --max_spur_length={adv.fa.max_spur_length}\r\n+ --select_branch={adv.fa.select_branch}\r\n+ --overlap_file_type=m4\r\n+ --thread_size=\\${GALAXY_SLO'..b'<param argument="--local_low_coverage" type="integer" min="0" value="25" label="local low coverage" help="If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25" />\r\n+ </section>\r\n+ </xml>\r\n+\r\n+\r\n+ <xml name="contig_assembly">\r\n+ <section name="fa" title="Contig Assembly Options (for fsa_assemble subprogram)" expanded="false" help="Passed to fsa_assemble subprogram. Constructs contigs from filtered overlaps and corrected reads. ">\r\n+ <param argument="--min_length" type="integer" min="0" value="0" label="min read length" help="minimum length of reads" />\r\n+ <param argument="--min_identity" type="float" min="0" max="100" value="0" label="min overlap identity" help="minimum identity of overlaps" />\r\n+ <param argument="--min_contig_length" type="integer" min="1" value="500" label="min contig length" help="minimum length of contigs" />\r\n+ <param argument="--max_spur_length" type="integer" min="1" value="50000" label="max spur length" help="branches less the threshod are treated as spurs" />\r\n+ <param argument="--select_branch" type="boolean" truevalue="best" falsevalue="no" label="select branch" help="select the most probable branch. default is no." />\r\n+ </section>\r\n+ </xml>\r\n+\r\n+\r\n+\r\n+ <xml name="contig_bridging">\r\n+ <section name="fcb" title="Contig Bridging Options (for fsa_ctg_bridge subprogram)" expanded="false" help="Passed to fsa_ctg_bridge subprogram. Bridges contigs using long reads ">\r\n+ <param argument="--read_min_length" type="integer" min="1" value="5000" label="read min length" help="minimum rawread length" />\r\n+ <param argument="--ctg_min_length" type="integer" min="1" value="500" label="contig min length" help="minimum contig length" />\r\n+\r\n+ <param argument="--ctg2ctg_min_identity" type="float" min="1" max="100" value="95" label="ctg2ctg_min_identity" help="minimum identity of overlaps between contigs" />\r\n+ <param argument="--ctg2ctg_max_overhang" type="integer" min="1" value="100" label="contig-contig max overhang" help="maximum overhang of overlaps between contigs" />\r\n+ <param argument="--ctg2ctg_min_aligned_length" type="integer" min="1" value="2000" label="contig-contig min aligned length" help="minimum aligned length of overlaps between contigs" />\r\n+\r\n+ <param argument="--read2ctg_min_identity" type="float" min="1" max="100" value="80" label="read-contig min identity" help="minimum identity of overlaps between rawreads and contigs" />\r\n+ <param argument="--read2ctg_max_overhang" type="integer" min="1" value="500" label="read-contig max overhang" help="maximum overhang of overlaps between rawreads and contigs" />\r\n+ <param argument="--read2ctg_min_aligned_length" type="integer" min="1" value="5000" label="read-contig min aligned length" help="minimum aligned length of overlaps between rawreads and contigs" />\r\n+ <param argument="--read2ctg_min_coverage" type="integer" min="1" value="3" label="read-contig min coverage" help="minimum coverage of links between rawreads and contigs" />\r\n+\r\n+ <param argument="--min_contig_length" type="integer" min="1" value="500" label="min contig length" help="minimum length of bridged contigs" />\r\n+ <param argument="--window_size" type="integer" min="1" value="1000" label="window size" help="threshold is used to group rawreads that bridge contigs" />\r\n+ <param argument="--select_branch" type="boolean" truevalue="best" falsevalue="no" label="select branch" help="select the most probable branch. default is no." />\r\n+ </section>\r\n+ </xml>\r\n+\r\n+\r\n+\r\n+ <xml name="citations">\r\n+ <citations>\r\n+ <citation type="doi">10.1038/s41467-020-20236-7</citation>\r\n+ </citations>\r\n+ </xml>\r\n+</macros>\r\n' |
b |
diff -r 000000000000 -r 6ee7eb5821f0 necat.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/necat.xml Fri Nov 25 14:24:27 2022 +0000 |
[ |
b'@@ -0,0 +1,440 @@\n+<tool id="necat" name="necat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT">\r\n+ <description>Error correction and de-novo assembly for ONT Nanopore reads</description>\r\n+ <macros>\r\n+ <import>macros.xml</import>\r\n+ </macros>\r\n+ <xrefs> \r\n+ <xref type="bio.tools">necat</xref>\r\n+ </xrefs>\r\n+ <requirements>\r\n+ <requirement type="package" version="@TOOL_VERSION@">necat</requirement>\r\n+ </requirements>\r\n+ <command detect_errors="exit_code"><![CDATA[\r\n+ ## helper function\r\n+ #def make_filename($i, $input_param)\r\n+ #set ext = $input_param.extension\r\n+ #if $ext == "fastqsanger"\r\n+ #set $ext = "fastq"\r\n+ #end if\r\n+ #set filename = "reads_" + str($i) + "." + $ext\r\n+ #return $filename\r\n+ #end def\r\n+\r\n+ ## push each input file and everything in input collections into read_list.txt\r\n+ #set i = 1\r\n+ #for input in $input_fastqs\r\n+ #set filename = $make_filename($i, $input)\r\n+ cp \'$input\' $filename \r\n+ && echo $filename >> read_list.txt &&\r\n+ #set i = $i + 1\r\n+ #end for\r\n+\r\n+ ## #for $i, $input in enumerate($input_fastqs):\r\n+ ## #set filename = \'reads_${i}.$input.ext\'\r\n+ ## ln -s \'$input\' $filename &&\r\n+ ## echo $filename >> read_list.txt &&\r\n+ ## #end for\r\n+ \r\n+ ## necat commands\r\n+ necat correct \'${job_configfile}\' \r\n+ #if $assembly.should_assemble == "yes":\r\n+ && necat assemble \'${job_configfile}\'\r\n+ && necat bridge \'${job_configfile}\' \r\n+ #end if\r\n+ ]]></command>\r\n+ <configfiles> \r\n+ <expand macro="job_conf" />\r\n+ </configfiles>\r\n+ <inputs>\r\n+ <param name="input_fastqs" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Input reads" help="Input read files (FASTQ or FASTA). To select more than one file or collection from your history, use the \'ctrl\' key" />\r\n+\r\n+ <param name="genome_size" type="integer" value="" min="1" max="100000000000" label="Genome size" help="Estimated size of genome (bp)" />\r\n+ <param name="min_read_length" type="integer" value="1000" min="1" max="10000000" label="Min read length" help="Minimum length for input reads" />\r\n+ <param name="correction_coverage" type="integer" value="40" min="1" max="10000" label="Correction coverage" help="Number of reads to correct in terms of genome coverage. For a 4Gb genome, setting correction coverage = 10 will correct the longest 40Gb worth of reads from the input fastq. " />\r\n+ <conditional name="assembly">\r\n+ <param name="should_assemble" type="select" label="Assembly">\r\n+ <option value="no" selected="true">Don\'t perform assembly</option>\r\n+ <option value="yes">Perform assembly on corrected reads</option>\r\n+ </param>\r\n+ <when value="no" />\r\n+ <when value="yes">\r\n+ <param name="assembly_coverage" type="integer" value="30" min="1" max="10000" label="Assembly coverage" help="Number of reads to use in genome assembly in terms of genome coverage" />\r\n+ <param name="polish_contigs" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Polish contigs" help="Polish contigs as final step after briding" />\r\n+ </when>\r\n+ </conditional>\r\n+\r\n+ <section name="adv" title="Advanced options" expanded="false" help="Warning: only change these if you really know what you are doing">\r\n+ <expand macro="overlap_sensitive_options" />\r\n+ <expand macro="consensus_sensitive_options" />\r\n+ <expand macro="overlap_fast_options" />\r\n+ <expand macro="consensus_fast_options" />\r\n+ <expand macro="trimming_overlap_options" />\r\n+ <expand macro="assembly_overlap_opt'..b'scard=DOUBLE discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile. default: 0.01\r\n+--bestn=INT output best n overlaps on 5\' or 3\' end for each read. default: 10\r\n+--genome_size=INT genome size. It determines the maximum length of reads with coverage together default: 0\r\n+--coverage=INT coverage. It determines the maximum length of reads with genome_size together default: 40\r\n+--identity_global_deviation1=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 98\r\n+--identity_global_deviation2=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6\r\n+--overhang_global_deviation1=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 30\r\n+--overhang_global_deviation2=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 6\r\n+--identity_local_deviation1=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 99\r\n+--identity_local_deviation2=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6\r\n+--overhang_local_deviation1=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 10\r\n+--overhang_local_deviation2=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 6\r\n+--identity_local_condition=INT Local filtering conditions. 0 = overlap idenitity < threshold, 1 = overlap idenitity < threshold and query identity >= target identity default: 0\r\n+--local_low_coverage=INT If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25\r\n+\r\n+|\r\n+|\r\n+\r\n+*fsa_assemble*\r\n+\r\n+| Constructs contigs from filtered overlaps\r\n+| *Contig Assembly Options*\r\n+| \r\n+\r\n+--min_length=INT minimum length of reads default: 0\r\n+--min_identity=DOUBLE minimum identity of overlaps default: 0\r\n+--min_aligned_length=INT minimum aligned length of overlaps default: 0\r\n+--min_contig_length=INT minimum length of contigs default: 500\r\n+--select_branch=BOOL select the most probable branch default: "no"\r\n+--max_spur_length=INT branches less the threshod are treated as spurs default: 50000\r\n+\r\n+|\r\n+|\r\n+\r\n+*fsa_ctg_bridge*\r\n+\r\n+| Bridges contigs using input long raw-reads\r\n+| *Contig Bridging Options*\r\n+| \r\n+\r\n+--read_min_length=INT minimum rawread length default: 5000\r\n+--ctg_min_length=INT minimum contig length default: 500\r\n+--ctg2ctg_min_identity=DOUBLE minimum identity of overlaps between contigs default: 95\r\n+--ctg2ctg_max_overhang=INT maximum overhang of overlaps between contigs default: 100\r\n+--ctg2ctg_min_aligned_length=INT minimum aligned length of overlaps between contigs default: 2000\r\n+--read2ctg_min_identity=DOUBLE minimum identity of overlaps between rawreads and contigs default: 80\r\n+--read2ctg_max_overhang=INT maximum overhang of overlaps between rawreads and contigs default: 500\r\n+--read2ctg_min_aligned_length=INT minimum aligned length of overlaps between rawreads and contigs default: 5000\r\n+--read2ctg_min_coverage=INT minimum coverage of links between rawreads and contigs default: 3\r\n+--min_contig_length=INT minimum length of bridged contig default: 500\r\n+--select_branch=BOOL select the most probable branch default: "no"\r\n+--window_size=INT threshold is used to group rawreads that bridge contigs default: 1000\r\n+\r\n+|\r\n+\r\n+\r\n+ ]]></help>\r\n+ <expand macro="citations" />\r\n+</tool>\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 6ee7eb5821f0 test-data/test1.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.fa Fri Nov 25 14:24:27 2022 +0000 |
b |
b'@@ -0,0 +1,164 @@\n+>SRR7477814.1.32925 1aa2d72-91ef-48d9-950c-cb47fa132b5d_Basecall_1D length=542\n+TTGGTACTTCGTTCAGTTACGTATTGCGGGTCTTGTGTCCCCACTTGGGGTTTCGCATTTATCGTCGCGAAACGCTTTTCGCGTTTTTCGTGCGCCCACTTCATTGGCACCAGTCGGTATTGGTAGTCGTGAATATGACATTTGTGCAGGGCTATCGATAAAACTCGAGTGAACTCAAAATTCCGGTGCAAACAGACGGGCGAAACACTGAAGATCAACATTCTGATCACGAGCATTCCGGAAGATCGGCAGAAGAATGGCTGGGATCATTGGATTAATTTACTCAAATAAAGTATATACAGATCGATTGCGATCACTGTGGATAACTTTATCGGGAAGCTTGGATCAACCGGTAGTTATCAAAGAACAACCGTTGTTCAGTTTTGAGTTGTGTATAACCCCTCATTCTGATCCAACGCGGTCGGGGATCACCGATCATTCTGATTGATCCTTTCCGGGTTGTTGATCTTAAAACCCGGATCCTTGGTATCCACAGGGCAGTACGATCTAATAAGAGATAACAATAGAACAGATCTCTACCA\n+>SRR7477814.1.22516 6931c7d-1ac3-4ddd-b75c-778274821323_Basecall_1D length=5075\n+TTGGTATTACTTCGTTCAGTTACGTATTGCTGGTCTTGTGTCCCAGTTACCGGGTTTTCGCATTTATCGTGAAACGCTTTTCGCGTTTTCGTGCGCCGCTTCAGTGGATTCTGCTCAACTTTGTCGGCGAAAGACACAGGATCACGGGTATTAAAGAAGATCTATTTATTTAGAATCTGTTCTAAACGTTATCTCTTATTAGGATCGCACTTTTAACCCTGTGGATGCTGGGGATCGGCTTTTAAGGATCAACAGCACGGAAAGGATCATTAACTGTGAATGATCGATCTTGGGCCGTATAAGCTGGATCAGAATGAGGGGTTATACGCAACTCAAAAAGCGCTGAACAACAGTTGTTCTTTGGATAACTACCGGTTGATCAAGCTTCTGACAGAGTTATCCACAGTAGATCGCGCGATCTGTATACTTGGTAAATTAATCCACGATCCCAGCCATTCTTCTGCCGGATCTTCCCGGAATGTCGTGATCAGAATGTTGATCTTCAGTGTTTTTCGCCCGTCTGTTTTGCACCGGAATTTTTGAACCGGCCTCGAGTTTATCAGTAGCCCCACAAAATGTGTCATATTCGACTACCAATACCGGTGCGCCAAACGGACCGCCAGAAAGTCGGGCTTCTGTTCCTGCAATAGCCATAGAAAGGAAGGTTGTCGGAATATCTCCGGCACCGTGGGTGGAATGATAACCAGCCCAGATCCCGAGGCAGATAAATCTTCAACAGCGGACCGTGCAGCGTTTCTGGTGGTGGTTCTCTTCAGCTTTCAGCGGGTGTTCTGCTACATGTTGGCACCGCCGAGGTGCTACCGCTGATAAAGTTATATCACCATAAACCGCCACTAACCTTTATTAAGAGTGGCGTATTGTACCTTTAATGAACGTTAGGATACTGTGGAAAATTATGGGATTAAAAAGCCGATCAGAACTGATGAAACGCATGATCAGGTTCTGCAGGACGATCAGTAATCTCGGTGGACTGGTTCATCAATTGTTTTGGATCTTGTTGATAAGTACATGCTGGAGAGCATCGATCGAACAGCACATCACTTTTATAAAGATGCTGTAGTGGCCGGTGGTGTAGTACGCCTCGGTCACTTCATCGAACTTTCCAGCTTTGCCAGCGCGGAAAGGGTAGTCTTTGGCGCTCTTTAATATAATGCCGATAAAGCAGCCTACGTCATAACCGGAAAGCTGCTTCGGGCTGATTTATCAATACGCGCCCGAAGTAATGATCCCCGCCGCAACTTCATTTTTCTCTACTCGAACGTGAATCGTCCCCGGACTGACGCCAAATTGTTTCGCCAGTTCGGCGTAAGCAGTGCGCACGTGCCATTAATGCTTCAGCGGATGCCACGGTCCGTTGTCGATAAAGATAATTTTTCCGCAAGTTTTCTTATGCGGATTGATGATTCATTCTATTTTAGCCTTCTTTTTAATGAATCAAAAGTGAGTTAGGCTTTTATTGAATGATTATTGCATGTGTGTCGGTTTTGTTGTAATCATAACTTATCGGACGCAAGGTATAAAAATGAAAACCGCTTACATTGCCAAACAACGCCAAATTAAGCTTCGGTAATCTCCGCACCGTCAACTGGAAGAACGTCTGGGGCTGATCGAAGTCGGTACCGATTCTTAGCCGTGTGAGGATGGCACGCAGGATAACTGGTCGGGCTGTGAAAAGCGGTACAGGTAAAAGTGAAAAGCTCGCCGCAGTGCCCAGTTCGAAGTGGTTCATTCACTGGCGAAGTGAAACGTCAGACCTTAGAACAACGACTTCAGCGCGGGCGAAGGGCTGTACACGCACGCGAAAAACCCACTTCGCCGATGAAGACCGTCTTTCTCCGTTGCACTCGGTCTATGTTGACCAGTGGGACTGGGAACGCGTAATGGGCGACGGTGAGCGGTAATTCTCGACGCTCTGAAAAGCACAGTAAGAGGCGATTTGGGCGGCGAATTAAAAAGCAACCGAAGCTGCGGTTATGGCGAAGAGTTTGGCTTTGGCACCATTCCTGCCGGATCAGATCCACTTCGTACACTGGCGAGTTACTGTCGTTATCGGATCTTGATGCCAAAGGGCGTGAGCGGGCGATAGCGAAGATCTTGGTGCGGTATTCCTTGTCGGGATTGGCGGCAAGCTGAACGTGGTCATCGCCACGACATTACGCGCACCGGATTATGATGACTGGAGCACCCCGTCAGAGCTGGGCCATTGCGGTCTGAACGGCGATATTCTAGTGTGGAACCTGTACTGGAAGGTGCGTTGGCTTTCTTCCGTAGGGGATCCCGCGTGGATGCCGACACGTTGAAGCATCAGTTGGCGCTGACCGGTAGTGAAAAGATCGTCTGCAGCTGGAATGGCATCAGGCGTTGCTGCGCGGTGAAATGCCGCAGACCATCGGCGGCGGTATCGACCAGTCTCGTTTGACCATGCTGCTGCTGCAACTGCCGCATATCGGCCAGGTCAATTAACTGGGTATGGCCAGCTGCTGTTCGCGAGAGCGTCCTTCTCTGCTGTAATAATTTATCGCCGCCAGCGTCTGAGCGAGCGGCTTCGCATCCGGTATCAAAGCGCCAGATATGATCGAAAATGCGCATGATGCCGGGTTTATAGGTATTACCGACATCGCCACGGCATGAAAGCGATACTGATGTACCCGCTGCAGCTCTTTCACTTTACTCGTCACGTCGTCAAAGGCAACCGCTGGGCGATAAAATCAGAAATCACCACCGCATCGGCATCAAACCATTCCCTGCTTTGCAAGCGTTCCATAATGGCGCGAAAACAACTGGCCAAGATCGGTGCCGCCGCAAGGCGCTGCTGGCAAAAACGGATTGCTTGTTCGATGCCTTGTGGGCGCAAAGCTCATAACGGACGATCTCGGTGGAAAATAGCGCCAATATAACAGCGCCGGTTTTCAGCGAGAGCAATAACATCAAAGCCGGGCAGGTACTTTCGCGCACTGTTCATTAAAGCCGCCCATTGAGCCGGAAGTATCACGCAGACAATAAACGGTCCGCGCGGCTGTTCGTCGTAATCTTTATGCCACCGGGCGTTCGAACTTTTCACCACGACTCACCGTGCAGGCGATGGTGAGCAACTGTTTTCCACCAGCCGACGGTAAAGCTCGCATACTCCAGTTCCGTTATCCTAGTGTCGCCAGTTCTGGCGGCAGAAGACATGAAATATCATCGCTTTGTTGCGGACCATCAGCATCCAGGAACCGTCGCCGGTTCGCGCACCATGGTGCGGAAAGGTTTCC'..b'GGGCAACCCACGCGTTTGGCGATGCGGTCTTCCCGAGGTAAGGGTGCTTATATTCCAGATTACCGTGTGCAGGTGACCAATCTTTGCGCATGGTTTCTGCATAAGGACAGCTGGCCGCCATACGATCCTGCGATTACGCATTTTGGAAGCGGCGACCATCTCCATCGCTTTAGTGATCTTTTGCGTGTTCTGGACACGATCTTACTACGTATCTCTTTTGCGCCGACCAGGCTTCTCCTCAATGCCTTGCGGCCTGCCCTACGAGCAAGCCGCCAGACGTTACCCGGAAGTTGGGTTGCTTTGAAGGAATCGAGGATGCCTTTCAGCTTGCCTTCGATTTCGTCGTTGTAACCACCGGTCTGGACGATCTCTTGCATCAACGGGCGCAGTCACGGTCGACGTAAGCCAGCAGAGCGGCTTCGAAGCTGCCACTTCGACAGTTCAACATCCGCCGGTAACCGTTCTGCTGCGAACAGAGCCAGAGACTGCGCTGCGCAACGGACATCGGCGCATACTGTTTCTGTTTCAACCCAGTTCGGTCACTTTTTGACCGATGGTCAAAGCTGCTTACGTGTTGCATCGTCAAAGGTCCGGATGCAAACTGAGAGAACGCTGCCAGTTCACGTACTGTGCCAGAAACCGGTACGGATACCACCGGACAGTTTTCATGATCGCGGTCTGTGCTGCCGCAACACGGGATACGGAAATGCCCGGGTTAACCCGCAGGACGAATACCGGCGTTGAACAGGTTGGTTTCGAAATCTGACCATCGGTAATGGAGATTACGTTTGGTCCGGAACAGGCAGAAGCGTCACCCAACGAGTTTTCGATAATCGGCGGCGCGGTCAGTGAACCGGTTTCCCTTTCACTTCACCTTTGGTGAAAGCTTCAACGTATTCGGCATTAACACGCGCAACACGCTCCAGCGAACCAGCGAGTGGGCAGAATACGTCGCCAAAGAATGCTTCGTCACGGCGGACGACGGAGCAACAGCGAGATCTGACGGTAGCAACAGCCTGTTGGGCAGATCATCGTAAATGATCGGCGCATCTTCACCGCGGTCACGGAAGTATTCGCCCATTGCGCAACCGGCATACGGTGCCGGGTATTGCAGTGCAGCGGATTCAGACGCGGTTGCTACCACAACGATGGTGTTAGCCAGTGCCATTCTCTTCCAGTTTACGTACCACGTTAGAAATGGTGGACGCTTTCTGGCCGATAGCGACATAGATACATTTGATACCGGAATCGCTGGTTGATGATGGCATCGATAGCCGAATTCTGCAGTTTCTCAGTCTGACGGTCACCGTCGATCAATTCGCTGACCACGACCGGTGGGATCATGGAGTCAACGGCTTTATAACCGGTCTGTACCGGCTGATCTGGGACTGACGTTCGATAACGCCCGGAGCGATTGCTTCTGGCCAGAAGCCGTCGTGATCAGCGGACCTTTACCGTCGATTGGTGCAGGGTGTTTCCACACGGCCCAGCAGGCTGACCTTGAACTTCCGGGGATACGGCCAGTACGCCTTTCATGCCTTCGGCAAAGGTCAGCATCTGGACCCACAACAACCGCACCAACAGAGTCGCTCGAGTTCAATGCGATAGCGTAACAGTTGCCGGCAGGAGATCATTCACCCTCCTGCATACAATCAGCGAGACCGTGAATGCGGATAACACCGTCACTTACAGAAACAATGAGTACCTTCGTTGTGAGAGCTTCACTCACAGCATTGAATGAGCAATGCGCTGCTTGATCAGTTCGCTGATTTTCGGTGGAATTCAGTTGCATGCTCGGTCCCTTAAGACTGCAAGACGTCACTGCAAGGCGCTCAAGACAGCCGCGTACGCTGCCATCAATGACCATATCACCCGCTCGGATGATAACGCCTGCCATTACAGACTTATCGATTTTACGAATTCAAAGCTGCTTTTGCGTGACCAGACGTTTTCCATCGCAGCAGAATTTTCGCGAGCTGTTGTTCCACTCGGTGCGGCAGCGGAAATGACGTCTACCTCAGCGGTAGCCTCTCACGGCACGAAGGTGAATAAACAGCAAAACCATCCGGGGCGCGTTAGGCGACCATTTTCGTATTGCGAATAGGTTCAATGCGTTTCGTCAGTTGCTCACCACAAGCTGCGATAAACGGCTCGGCAGCGTTTCCACCCGGCGCAAGCGCGCCAGAGAAATCTGCCATTTGTTCGTTTGGTTACCTCGGCGGCAAACGCCAGCATGTCTGCCAGCGTTCTACGCTTTGGTGTTCGACGGCAAAGTCAAAAGCTGCTTTGGCGTAGGGGCGAGCTACCGTAATAAATTCGAGACATCAGCCCCTCCCCTCCTTACAGTTCAGCGACAAGTTTGTACGATGTCGCTGTTGGCAACCTAATCCACGGAACGTTCGAGTGATCTTCTCGGCAACAACAGCGGGGATAGCAACTTTATACACCCAATCTTCCGCGAGCACGTTTACGCTCGGCTAATTTCCGCCTGCGCACGGGCCACGATTTTTAGTACGTTCCTGTTCTGCCTCAGCTTTCGCTTCGTCAGAATCTGCGAGCGGCGTTTGTTCGCCTGCTCGATGATTACACAGATACTTTCGCTTTTTCAGCTGGTCGAGTCGCGCTGGCGCTGCAGGTCAAGGTCATATGTGCTCGTTCTGCGGAAGCAAGGCCGTCAACAATTTCTTTTATCGACGTTTTCGATGGCTGCCAATGGCGGCCATGCGTACTTCATGCCAGAACAGAACAGACAGGGACAAACGCGATGGCGGCGAGGATTGTTGCGTTAAGATTCACAGCACAATGCCTCTATTTAGTTAACGTTCTGATATTGCTCTTCGTAAAAGCAACACTGCGACGCGAACATCGCAATACAGACGGGCCTACAGCGATCATCGGGATAGCATCCACCAGACCCATAACGATAAAGAACTGAGTACGCAGCAGTCAGATCAGGTTGACGCCTTTCTTCGGTACCCCGGGATGCCATTACCGATCGCAGCACCGGTGCCGCCAGACCCATCATCACAGCGGCAGCCATGTACGAATCCATATTCAGGTTTTCCGCGTGATCTCCAGTTTGTTTCAGTTAAAACGTAGTAGTGTTGGTAAAATTAATGTTCTTCCAGACGCCATCGACAGATAGACGATCGTCAGAACCATGAAGATGAAGGCTTGCAGCGTAATGATCAGGATGTGGAAAATGGCCCACGGCATTCAAGTTTCTTGACCACCACGGCAACAGACCAGCAATCAGAATGAAAATCAGCTCACCGGCATACATGTTGCCGAACAGTCACCAAACCGAGTGAAACTGGTTTGGACAGCGAAACTACCCTTCAAGGATTAAGTTGGCAAGGAATGAACGCGAGTGATTGAACGGCTGCAGCGTCAACTCTTTCATTGAAGCCGCCGATGCCTTTCATTTTGATGCTGTAGAACAGAATCAGGATAAATACGCCCAGTGCATAGACGGCGTTACGTTTCGTCCGCAGACCGGAGACACACGCAGTGCAGGCGAACCCAGTACATGTTCAGCAATGTGTACGGCAGCAGGTCGATAGGCAGTAAATCCATCAGTTCATCGGAATACCCAGACGAGATCGTCAGGGCCAGCGGAGCAATCAGCTTGCTTTTGCCATGGTACATGTCTTTCACGCTACCATTAACAAAACCGTCCTGTAGCAACGCTCAATCGGTCTGGAAACTTACACGGCACGCTGGTCGCCTTTTTGGCTACCTTACGGAATAAAACCGGGAACAACAGACCCAGCACCGCCGAAGAACATAGATCAATATTGATTGTCCAGAAGGTGGCTGGGGGTTTGTGGATCCACCAGCGAGAATATTACGCAGGTCAGCTGAAGGTTATTCGGGTGGTGTCTATGTAATCCTGCGGCGTCATATTTTCTGAAGCCATGATGCCTTTTACCTTTGTTGTTAATTACAGCCGGTGCCAGTATCTGAACCACCAGCACCAAAACCACGTAACGATCGCG\n' |
b |
diff -r 000000000000 -r 6ee7eb5821f0 test-data/test1_head.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_head.fastq Fri Nov 25 14:24:27 2022 +0000 |
b |
b"@@ -0,0 +1,200 @@\n+@SRR7477814.1.32925 1aa2d72-91ef-48d9-950c-cb47fa132b5d_Basecall_1D length=542\n+TTGGTACTTCGTTCAGTTACGTATTGCGGGTCTTGTGTCCCCACTTGGGGTTTCGCATTTATCGTCGCGAAACGCTTTTCGCGTTTTTCGTGCGCCCACTTCATTGGCACCAGTCGGTATTGGTAGTCGTGAATATGACATTTGTGCAGGGCTATCGATAAAACTCGAGTGAACTCAAAATTCCGGTGCAAACAGACGGGCGAAACACTGAAGATCAACATTCTGATCACGAGCATTCCGGAAGATCGGCAGAAGAATGGCTGGGATCATTGGATTAATTTACTCAAATAAAGTATATACAGATCGATTGCGATCACTGTGGATAACTTTATCGGGAAGCTTGGATCAACCGGTAGTTATCAAAGAACAACCGTTGTTCAGTTTTGAGTTGTGTATAACCCCTCATTCTGATCCAACGCGGTCGGGGATCACCGATCATTCTGATTGATCCTTTCCGGGTTGTTGATCTTAAAACCCGGATCCTTGGTATCCACAGGGCAGTACGATCTAATAAGAGATAACAATAGAACAGATCTCTACCA\n++SRR7477814.1.32925 1aa2d72-91ef-48d9-950c-cb47fa132b5d_Basecall_1D length=542\n+$%+$'%.46>.;-)%*+../+1)7,*,+0)+*5-,283?==0%*%%*<?1;GE2.1+/2305@6/%&)*0=:862352/1-041>FHE2@E>>7>B*'?BHC<A@-.)&--0&282?8/42+++&)559B=??0./.+1))/8377.-;=745<><33/1..*..6,&%%()25;313-8)2=187>>=CD=8>:CFAGE303B12-.).0:,/1363947A4(*)+)')'%&,;=G=5;;;@-48A7<8205-+--,00,+0??;:,'1/+4&4-4;/?-+-4788.(130,10+&)*+40++*'%-)+//30.&&%'+1C;A62.3:+)-8&-,)+*+188>:AE<@?H7G;<<=F97607+(7D?7?:G84C.278).->FC5>1A30139<>;D;>791-+/5-8793>(7).&&(+*003>,.A<.264479(=?D?@4D2+)/029G,;(,--,8.1/..8.;+@>;+61,.6098')&7(:=8CB-12620,'04,43565+<*+83,++/6;D1>31,1173&+<>B59(%%$&\n+@SRR7477814.1.22516 6931c7d-1ac3-4ddd-b75c-778274821323_Basecall_1D length=5075\n+TTGGTATTACTTCGTTCAGTTACGTATTGCTGGTCTTGTGTCCCAGTTACCGGGTTTTCGCATTTATCGTGAAACGCTTTTCGCGTTTTCGTGCGCCGCTTCAGTGGATTCTGCTCAACTTTGTCGGCGAAAGACACAGGATCACGGGTATTAAAGAAGATCTATTTATTTAGAATCTGTTCTAAACGTTATCTCTTATTAGGATCGCACTTTTAACCCTGTGGATGCTGGGGATCGGCTTTTAAGGATCAACAGCACGGAAAGGATCATTAACTGTGAATGATCGATCTTGGGCCGTATAAGCTGGATCAGAATGAGGGGTTATACGCAACTCAAAAAGCGCTGAACAACAGTTGTTCTTTGGATAACTACCGGTTGATCAAGCTTCTGACAGAGTTATCCACAGTAGATCGCGCGATCTGTATACTTGGTAAATTAATCCACGATCCCAGCCATTCTTCTGCCGGATCTTCCCGGAATGTCGTGATCAGAATGTTGATCTTCAGTGTTTTTCGCCCGTCTGTTTTGCACCGGAATTTTTGAACCGGCCTCGAGTTTATCAGTAGCCCCACAAAATGTGTCATATTCGACTACCAATACCGGTGCGCCAAACGGACCGCCAGAAAGTCGGGCTTCTGTTCCTGCAATAGCCATAGAAAGGAAGGTTGTCGGAATATCTCCGGCACCGTGGGTGGAATGATAACCAGCCCAGATCCCGAGGCAGATAAATCTTCAACAGCGGACCGTGCAGCGTTTCTGGTGGTGGTTCTCTTCAGCTTTCAGCGGGTGTTCTGCTACATGTTGGCACCGCCGAGGTGCTACCGCTGATAAAGTTATATCACCATAAACCGCCACTAACCTTTATTAAGAGTGGCGTATTGTACCTTTAATGAACGTTAGGATACTGTGGAAAATTATGGGATTAAAAAGCCGATCAGAACTGATGAAACGCATGATCAGGTTCTGCAGGACGATCAGTAATCTCGGTGGACTGGTTCATCAATTGTTTTGGATCTTGTTGATAAGTACATGCTGGAGAGCATCGATCGAACAGCACATCACTTTTATAAAGATGCTGTAGTGGCCGGTGGTGTAGTACGCCTCGGTCACTTCATCGAACTTTCCAGCTTTGCCAGCGCGGAAAGGGTAGTCTTTGGCGCTCTTTAATATAATGCCGATAAAGCAGCCTACGTCATAACCGGAAAGCTGCTTCGGGCTGATTTATCAATACGCGCCCGAAGTAATGATCCCCGCCGCAACTTCATTTTTCTCTACTCGAACGTGAATCGTCCCCGGACTGACGCCAAATTGTTTCGCCAGTTCGGCGTAAGCAGTGCGCACGTGCCATTAATGCTTCAGCGGATGCCACGGTCCGTTGTCGATAAAGATAATTTTTCCGCAAGTTTTCTTATGCGGATTGATGATTCATTCTATTTTAGCCTTCTTTTTAATGAATCAAAAGTGAGTTAGGCTTTTATTGAATGATTATTGCATGTGTGTCGGTTTTGTTGTAATCATAACTTATCGGACGCAAGGTATAAAAATGAAAACCGCTTACATTGCCAAACAACGCCAAATTAAGCTTCGGTAATCTCCGCACCGTCAACTGGAAGAACGTCTGGGGCTGATCGAAGTCGGTACCGATTCTTAGCCGTGTGAGGATGGCACGCAGGATAACTGGTCGGGCTGTGAAAAGCGGTACAGGTAAAAGTGAAAAGCTCGCCGCAGTGCCCAGTTCGAAGTGGTTCATTCACTGGCGAAGTGAAACGTCAGACCTTAGAACAACGACTTCAGCGCGGGCGAAGGGCTGTACACGCACGCGAAAAACCCACTTCGCCGATGAAGACCGTCTTTCTCCGTTGCACTCGGTCTATGTTGACCAGTGGGACTGGGAACGCGTAATGGGCGACGGTGAGCGGTAATTCTCGACGCTCTGAAAAGCACAGTAAGAGGCGATTTGGGCGGCGAATTAAAAAGCAACCGAAGCTGCGGTTATGGCGAAGAGTTTGGCTTTGGCACCATTCCTGCCGGATCAGATCCACTTCGTACACTGGCGAGTTACTGTCGTTATCGGATCTTGATGCCAAAGGGCGTGAGCGGGCGATAGCGAAGATCTTGGTGCGGTATTCCTTGTCGGGATTGGCGGCAAGCTGAACGTGGTCATCGCCACGACATTACGCGCACCGGATTATGATGACTGGAGCACCCCGTCAGAGCTGGGCCATTGCGGTCTGAACGGCGATATTCTAGTGTGGAACCTGTACTGGAAGGTGCGTTGGCTTTCTTCCGTAGGGGATCCCGCGTGGATGCCGACACGTTGAAGCATCAGTTGGCGCTGACCGGTAGTGAAAAGATCGTCTGCAGCTGGAATGGCATCAGGCGTTGCTGCGCGGTGAAATGCCGCAGACCATCGGCGGCGGTATCGACCAGTCTCGTTTGACCATGCTGCTGCTGCAACTGCCGCATATCGGCCAGGTCAATTAACTGGGTATGGCCAGCTGCTGTTCGCGAGAGCGTCCTTCTCTGCTGTAATAATTTATCGCCGCCAGCGTCTGAGCGAGCGGCTTCGCATCCGGTATCAAAGCGCCAGATATGATCGAAAATGCGCATGATGCCGGGTTTATAGGTATTACCGACATCGCCACGGCATG"..b'0%6>4/8-*5((-40:8:6=;855.3,+,/1=F>A@2+,44?7,3=@,*,317B431154E618:8,7<,),($*(%..2311564+-+00.,((+,$,%*//?7DE99.,,,//+.0,/68,.74:;6;786:*%\'%*.)00=;5*-.27269-/4;806.-1622(56*100;78835.2,,-;:3:?@0))(.20,**3*34+-0<5,19+64;3:0;26;IB8B123;FA21,.29<@38)%+((+.4(\',*+*+\'0(,(+\'99+*(*0(.45?/=/A1B//2214A<35@@;<?722B5?4635488;/*+*1=BB>7/8//)0,*%)7\'30.?54*($&)%\'+./,.1=>/\'230<--+441:13,)66&1(*1(/2%\'\'/.8:7./*042=?.*.1667:?D?41,6-,,0/10.2))04.9B:;>35;FB?;..0,*,\'/78@51,,&*+AB<>94%/6)$.(((&((+-;>,%(%,+0;?>H@A9,0/41:FA+124//-.22,@8<0(0+0:B036<55::956@9118>1;47-2*,)*),3A3347//57315425:BA1772.4=,&+&+\'+-*+&).34;471)(\'*,$)2776$+\')\')(&&&+--(-(+1-+/3;>1-%.//7+2A00*>GG7.3*5D.38@F;9;74/3440.+/=0*-).%*,%-\'9?@@5577/+\'9?,*\'(\'&));-++).(,++\'&*(/)\'(*&*6>D-&()-/00-<7=83@E1885221/()\'*(((*&&-4.022;?EB))(2.+)%.+.,)&%$)DC>FA<//..1-7E33?9@=4245-///+(\'&%$()-.-4==@?@A4;50)-,)*276<),+20=306;7523?*.\'\'%%(*.*--3,,%@,D;DG64/3:6AACA3C0%*03249@5374126@::87;40(5123/(,++.26;9;6;HHH=078=@18<8<D;61.<-/8.;;>,)($&65AAH@668,46:>61,-)+5>D*25.*+,11052<1D-6&&\'-126<B*\'\'4,1-<8:8:+37858A))6-@<:+/7-52,1,5-3ADAJ<@*-9:889885)92:04-//01787;/,--5;-1@,(\'\'*)9C</%)*.0.1,00/47:>=?8456:88>-/-3*&(*\'\'(6:0*--08FA9=I831,.268C22#\'+658E<111177<C@32$###$).2;=9@?==>B@;G:9355440/%2.04A66./38469285D=H297056602).40*2).047+)&*++/(1+,),2--\'(\'\'(\'))*-<(8+3.)&*,*+/25415..(&/*).2;6,0.50:45F:35/\')2+/241>@9<@C69+4&&*/-5.,0*)&<8,,/264==<-:-\'*96-28)33->02==257<9<+,5(*&)()*/16511=7-/\')+\'2+2..55,-&\'&,%07<>::/),(&&+,1*+.2DC<A6@<1%,12548/0*3,:-+0/*.77:2666203))*-0?/92,;6.0$&(+-()+;=E96878B=.*./,<90/622/+.&..;2309<.).05A?8-)*-0@(*47051(+++**\'112/++.3<8=;H(7/91FGE.BH>;==7*\'+-,/00.=</-3/((77E<*6,02,*)*)+),)(),11GF7123*(&\'*\'*(*\'*&\'\'57;56;8>31323?<@7..&)&\'+3/7.,)-*&(1;</:(\'.@=D-*..--?G6.>CCA981682>=40/,-146\'1<+8,/1.+-*.15@3-0++,221A2BID8?@+081,*--(\'*()(\'42),&(\'&%#\'%%\'%,\'+)\'*\'((,.)-\',-3=&$##&*)935333=)+(%#,\')(\'+(%(&((*$)\'($*\'\'&)***3,+1,1.79;9=1.\'&&\'$($))1.1316-02A60)-3-+)&%\')\'&*+.(*((($$$(&3:8+0(*./-5/-.1+*(*+%.\'/2400+&%,,)/.-&,-2-&,0\'-(77.%\'&&(%)$(*13)+&7//.()+(++\',\'*#(+&,40*#&*(+(/)7<;600)\'/62?/&$$*.+,)\'*\'&()*$+*61*.)61)\'\'.-)&&$($-,*3,69*%%0.+&*&$&&$)++)%\'(22&4001(5>781(-2/B1?31*)+))\'\'\'(&*-.\'**%.$70>965:,*\'\'&%)+(()*-8@53)0,%%$$)\'))*,2418800*-\'02000/=0/*,+5-*\'$,+/@@@,/\',)(02)+(+*(,+((\')$$++)1//45\'*,,+-\'-1423(13*+)\'"%%65;:((%#$#%%&\'++(*(,\',.,%*)\'---/29+,%\'$+,.=64.+.\'*%)(&-2)),*2.(&*&)*,+/0(,.$$$*$%$%$%(%\'&)),;02163/28342..\'\'$&$%(%,(\',825,,)(+&)+&**+-*01--/-/\'(&*+\')%*),0)0.(+)(**()**&%((%*/2,,&-+0/())*(953,,+\'1).)%)\'&%)(*(/-(4;C62((&&.2(&(\'\'$)+(\'1845.,%*\'%()*((++*)/0<*&("%/+/\'0+-,1+)(**&\'\').79/7,*-%)%*-2)*+&&*+&33)\')\'%%&$(\'/181/--,$,(%((&-+1(\'\'\')-:=.12--@6-\'>5)</,)$+*)/\'-.>&/1**.35=6.\',(.12(1?0,/-7?8/&#%)$%,+\'\'*+*3/&&&&(.))+4*\'+\'1<29:,\'((*+,*1,+(*()+,.$(%(\')&)(\'%-0((%(&-,0.)/.3-\'(\'(-4**%)\'\',1*0,((#(+3302/-22-32(-(#(%\')(//\')*-+*)(\'(,,+*\'),\'-:.*&(%&+.-)%)--%/,.+4=.\'%*\'-,.\'.+4+/\'\'(/8+(30.(+*,/**3.-.(%,-**(-./\'\'.)&#\'%+((#\'(\'(1\'&&$(,%&2@8+)$\'*1**\'\'(\'\')&**+%/-1*$\'#)-+6($&&&.,\'%)&&$#$+/($\'&0(-*),/(+$(.+)\'(+\'*&)((+++-,\'.-.,-,\')))(%%\'%\')*$&&%\'/80**&1&(+(\'(%())*)%&)#%\'*,%\'&+*%)\'(02),\')%.*(\')\'(\'*)\'*&\')+,).+(1*+,6/+)#%$*-)\'&#\'#\'(%*%($&($%*&&(1(82&)\'&#()\'-*\')&(--84/6%,$$&))+)$)&$\'%-\'\'%%&+.--4//0(\'&&39.\'*--)*&(&%$+#<**)(\'&&+,0+\'+++060*05)-954*$*&%\'&($%%$&)\'&$$%%$%%%%&,*)(%%&$)-*-\')&\'\')+%(%&\'(()%\'\'\'$(\'-+0/++((\'&901\'*&%(+\'%,*+\'\'--)(\'&\'-48&)%&(/11&1),,7,*()*+06/((%)$&$%#*/)$&$$$$$$$0-0(\'+14((%$&%%*3*&.620+&&)&(&+*\'(\'**&)+/\',-/\'+),%&&%*8**()\'%0<(\'$%&%)%\'\'(-*6%&%$&.,.-\'85.*\'+)#$$&(\'-.&()+2+()%)*-/(+2*((\'(+..&\'.3-2/9/\'+($+&&(*0,-0,)21+)\'*1*$(,-&$&&+/3304=.(*8/*221177,\'&-&\'21\'*\'\'.03+((/,.969+>*%,)\'(\'\'))+*)\'\'$-(362&&.7)$&)\'12*(-&*,0,&*&&,\'>5/1$\'$\'(+*,(%344-(&),/,*\'$&$#$%%\'(&)+\'\'&&+4*+)%&(&)(&*(.\')(,/2..0/**\'*)$/*$%$%\')+,(*&\'(%&+,(%(\'))%$-()*(%%(\'((&*51/5,,\'$$(%%&&%+,()&$()+*9)\'$%$\'$&&\'(*2))&\'$-03/\'0-($$)(()\'&&$$(&,).(),%%%$&&\'&)+.*(\'\'%#&)\'&%*(*&%%$#&%.0-)(+(0*()\'%*((\'%\'$%\'&%\'*,)\'*&,+&%&$%\'\'&(\'&\'((-*+*\'\',-8\'\'+&%((+/&%\'3+&&\'&%%\'./,%(%%\'%\'($\'%\'($&*\'(%$\'\'$,)&&+&(&((2-.%)*\'$$(\'$$(($**$(&#"((#)($(\'$\'(#"&#(-%(%\'&$#\'-%)"&%&#-%%$"%&$#%$#:6\'\'\'$#####$$$#$%%%)&)\n' |
b |
diff -r 000000000000 -r 6ee7eb5821f0 test-data/test1_tail.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1_tail.fasta Fri Nov 25 14:24:27 2022 +0000 |
b |
b"@@ -0,0 +1,65 @@\n+>&,&*(,-;,/-7@7)*+-+,&.2,:225AA=;=;4C462:8@.$,0B/:A/3B<8IF<1-/*,6@65;34-/A91113E50)+*1CC54<B@C=D0+-;-/%&&$$%$&**;:;6/)1),'/353%(%'%')21>4;2,(+)&&&&)*++%&+,6:8<:)BA?@3333020+/<F=&'25,,,*7>)*&1,*++,167><@76670/,)120(*+&398=39BBF>9;0/%*2-2*7664/)%1*202?3+/7/0-.8;:B3BC<<>4>3+12))(*++;:4,6*)000?00,DA00DD<85'+.*3*(0/4*BA18('&/8;D/6+<01-16>C+),7.<DC+64'1&)%'$'&.-4<D:7E69/%)*1;..474:42/5=*:9:3>2=2+/5+B+.0ED:4<6(5(1,-*%&.-55=?:;B/''%$''**%87.129@>65./5--4101'%%)(*,.12-/45/,+,)+8=8:23695?8,.3>6@2-*2-,(8:*+69FCF=7871644A5194,-//-2)6486<80241<+,')66.095+)4-17<=78/,--/?/101,+)'3,(+3=()7;,008?D:,,,-,:F12HH@<GF4<;79&3&'&%$$&&$#+)5>.*0444470$$%'(*/3,636496))$'#&&&+%&-*+668%-*'+-.,,,579D;<;<H;98C280*050:6254';88FG62EHA156=9<=96861/.,&'/*0.5?82'*054,)+)(35?>?C,+)(-9-,,.,:,-,1:2/1003B'-4/:=H89034-'+&.),6&228:7(:(4?+;679/1.90:DA':;9?860'55B945642763(-3(*&$&%(&--))2**-&9GGHIF?B36-1**&=/(+<81/-1+44-/%+04AG7683B2+11G64%/,/4*&&-5137;$'$&%&(&*-=>C2H42.*3-),,3;6<97553=@;>@0//)#'$(%'CGD>,/+%.,**'().%)')'('*,($)%%''&*'+'(,..0.(-71743..2+3./+&'.++7'+7/6775(2-6-<3'$(&%+'$.,-*/99/52.'+.7680,&3-&1,.'')++*&4.6889;CD:1EG72,/:89>6278,/)(((062112857)2969001.+)+,/57E>*976,.60(/2,*,((12/559:?;;???@$*&$&)'&')*/%)*&&(&*--712+-*01-*8+,=00<=,38/.3.(+0-*.','58*:4600,-,((8BF-*/,,106?12A7%/9-038<@3100/0.3/64>4''+'&/,,,/2'8AG8AI4800-2')'*8-(:-).2/'(+0038366:==>6F52374%))+5A=379313('-2%+.),2,71&4;/1;72/0;80&%%%)1*9*()-+/,-)+'+-/*,*-/:;-=-.%(+-@E:588(%64),ABE>=>E+,11),09)+,<4;)..&--/)**7<8;9:+).12:12,()):-,'1;9AG?5*0/,.++,0<?B=1)+1/.//,.0:+88=??==D'2'(-+0+,(+--,+.,;=B/,0344/11/1=430GG3).258313&.?*)''$'(*16/:=07<E:4(),772236A7C8-7038,+//'%&'(60('-)0))+0-.*/5D9>D246,&-&'',-35:02104-<A;/546;001FA1C3=5@2/224C=BA0+,'/@403@A0&9*)(%'(--*-.1G,00/1/@=8A>639E95F0/'>C?FD65.1&3.(/&&''+04:1,*-)(,1<@740+-+-.0;55=3.,-34/+,00*-..4/?6/(',''%(0,&,2/00-((,.:2465,18:7--43<:,+656@8<;@@D3D.4'*+35-;;3;;;&95((-/''+1,.(78CC112B??;=C6'%(,/>10//11/BHB-+'&#(3<A8?)7254>C96IH4;*3-?>431518B6.0/48/<):AD4/))&03*1.-/9-;;;,%0+.16656II9:9>-/93;-)((&)&+8C@5=21-+*+)19886@D>8720:5,-2857;<<AHI=8,0,6.90.3-/0:7;IH:,(&&(+((-4599,*5;--3321>=,14-.283?CF2001('5).0D<5:78?.)'%'&-3<58:@?()/'(**,9?2,/26-(/2(0ECBFA8>57788,/9?=7&>(+'0*<DGH>-/759>HIJF2-/18HE>.84/+,57A'50.-6/3E32*''0:158389B=;;89:0230*14-+'')2)0%6>4/8-*5((-40:8:6=;855.3,+,/1=F>A@2+,44?7,3=@,*,317B431154E618:8,7<,),($*(%..2311564+-+00.,((+,$,%*//?7DE99.,,,//+.0,/68,.74:;6;786:*%'%*.)00=;5*-.27269-/4;806.-1622(56*100;78835.2,,-;:3:?@0))(.20,**3*34+-0<5,19+64;3:0;26;IB8B123;FA21,.29<@38)%+((+.4(',*+*+'0(,(+'99+*(*0(.45?/=/A1B//2214A<35@@;<?722B5?4635488;/*+*1=BB>7/8//)0,*%)7'30.?54*($&)%'+./,.1=>/'230<--+441:13,)66&1(*1(/2%''/.8:7./*042=?.*.1667:?D?41,6-,,0/10.2))04.9B:;>35;FB?;..0,*,'/78@51,,&*+AB<>94%/6)$.(((&((+-;>,%(%,+0;?>H@A9,0/41:FA+124//-.22,@8<0(0+0:B036<55::956@9118>1;47-2*,)*),3A3347//57315425:BA1772.4=,&+&+'+-*+&).34;471)('*,$)2776$+')')(&&&+--(-(+1-+/3;>1-%.//7+2A00*>GG7.3*5D.38@F;9;74/3440.+/=0*-).%*,%-'9?@@5577/+'9?,*'('&));-++).(,++'&*(/)'(*&*6>D-&()-/00-<7=83@E1885221/()'*(((*&&-4.022;?EB))(2.+)%.+.,)&%$)DC>FA<//..1-7E33?9@=4245-///+('&%$()-.-4==@?@A4;50)-,)*276<),+20=306;7523?*.''%%(*.*--3,,%@,D;DG64/3:6AACA3C0%*03249@5374126@::87;40(5123/(,++.26;9;6;HHH=078=@18<8<D;61.<-/8.;;>,)($&65AAH@668,46:>61,-)+5>D*25.*+,11052<1D-6&&'-126<B*''4,1-<8:8:+37858A))6-@<:+/7-52,1,5-3ADAJ<@*-9:889885)92:04-//01787;/,--5;-1@,(''*)9C</%)*.0.1,00/47:>=?8456:88>-/-3*&(*''(6:0*--08FA9=I831,.268C22#'+658E<111177<C@32$###$).2;=9@?==>B@;G:9355440/%2.04A66./38469285D=H297056602).40*2).047+)&*++/(1+,),2--'(''('))*-<(8+3.)&*,*+/25415..(&/*).2;6,0.50:45F:35/')2+/241>@9<@C69+4&&*/-5.,0*)&<8,,/264==<-:-'*96-28)33->02==257<9<+,5(*&)()*/16511=7-/')+'2+2..55,-&'&,%07<>::/),(&&+,1*+.2DC<A6@<1%,12548/0*3,:-+0/*.77:2666203))*-0?/92,;6.0$&(+-()+;=E96878B=.*./,<90/622/+.&..;2309<.).05A?8-)*-0@(*47051(+++**'112/++.3<8=;H(7/91FGE.BH>;==7*'+-,/00.=</-3/((77E<*6,02,*)*)+),)(),11GF7123*(&'*'*(*'*&''57;56;8>31323?<@7..&)&'+3/7.,)-*&(1;</:('.@=D-*..--?G6.>CCA981682>=40/,-"..b')/,:-6/@7,0-833E/4--+&%\'-1D9::5E))/&)*56<-7,--065/4<GC760/03F/A>3845*,&3,-.A7@7C66;C2,6235>>?99>;:8<:19361<=)+1,*&++,340.,81147;83865<B9;;A<?GFC725<.4+3111GHFA>5B..-C3416*\',,/78-4I=-575@7@H@@F22/\'.0/1.;10-@?@BCCEE3/9?9D486:;2+-+((-,AIFFDF85040/23276:5F8B68;60,*&\'&%\'(1%+>=>H*)9948<06>-74.(-,=(9773/,130.-/1/2C=22(=0:99;55JE?783663;EAFDC<AJ=7<;?:98//.8925D3@=88?A1)*7;;=:867.048>ACC>0-.)3(,1/,+,,--7&+&(*5*366/<:9<?<6@79E>+.,*08644**(*,((+&02?1;3>=:E=03/031;A97/,97/,+&)3-/D=67(/97/)++0EBC6/556F@EFGE?E8A8>?:@>7==88:8-?@4,A<@F:1/(-(-/2FJH7DA9-11683+35=;2)-EEA8::-.60.*4(\',367?H<>>BC@848<(7*/B-F0HB>>>DGEC2&-))8@==CAB:E888H1))\'--2037/>1,+27-3)6))+=CJIH<;2/:2CID?+,8C;021;5553&&.>950/)%),*44):0:68@6DB57/;/16<0/.;D(@@@9?5>30-*106<EC9/3/?/93/04;>7<>=BCE=C:@3C76.),,)+.36AJCFD?D>5H54>..-:/2577A/-80=(@(@,0&+$\'\'%0*&)-+8(4)\')(/)*,47C/..1<;B886E@)65=><?8B06>948754/43.4;*3>@=788D678+\'92;9<\'-3.*+468:C=:A@CI>/0,(,.0//24457;6/2/73:;8*++0(+-,\'\'\'(*),,.21.-/5AC=?=:9969-2\'&1*+,9:>:31\'\'-2>14@45<;@B//(.1)65//*&,,32421*\'()++,7/2-<8D97:4\'(%.(\'89;;FE=B@ID?3441\'/78;=99:>8<=0-0/326;91.10,568IH?9I:21<10/:8-;.&/>>ABE@FF>D.0\'$)&*-+64EFF/.-/><8@4.1/12AC:B<F?BC<<6,+672569BFDC:F0,1E/45;21+2305.0<01/54D9248A<A2,/+>0/,289+-715DBF=6:A9,\'.0/3-/.--2014690086&010:7/022:3,-(\'*735595580035568FB?AC999446?)&$1+498;0@?F=:=;&\'.+/226,731*1++)%\'+,53;825,3+406/+/15;FC/>.><AAEADA0#((*@5>6D76EH>>@FDB@D?9ADB@EH:D2,,-,,).74/4/0/857=92344>BCBC72,((13+9***41-*(.,)+%,*$-3==A7A=A:84.6<A8724=/*2@7C>6,7,2-203/)2&\'\'BF2>6/3-10//-(\'\'+*/8>+-(3,///-:2449,=50\'(1(009<;79:;A9032:;::CCEDEID9A909&>*E?))()-*&.2\'/0+%*2,.4-79360<843234350123,34&78?22;8<@BAD>>BBAA@;+:5=)5+)./).9+/4<3(,:3:>>,()(..76:8B=HC3(-*)&+%5.-250AD<AAIGCE@;6?IEH.,+9*3;-.-03-3GA64393<ACAA79910/.(&011:3-..,&\'(,5:;>;84.)-%+3@655858-*,6->@F6<B53*,2++03/52.<?:06/9F6*-4.)->10106(50,.((-0>47*,-//7488:43\'*))0.24:>@E?C12366+,4?7D>??C;=67;E;?D4623.9B6876A3=A+%3665GD?561-))*CIFC3<-2201;<<=<D<A>80<83/(.,5164F9B.,)\'(+.>F56:94--+,(349-,?D/.*)+H@601/(()()\'(\'\'*/47=B>;<=8@H,1(++4))856=1-)+-0;5C@8320&386)8@;7345+42.14C38(*,6,/+02-/4)02@=F;?2*$%#.+34-,,/:///>5,04846@01472,)329GG38&,*6/)2/0>9<5?75/(%&&*+5A;358;9<>A<>A4,/*&)60+49:;724-88=2,:;:<54-./">:5#,\'+))$0*+,:>59<=8<A,+,&,.(&\'47=G7>H:/:9>0:GB5@7I15;8766:(43*.-1487.41(*(-/));15)/02:00.2-/033;GGFGHC:+4793<C:79-<B?2,.-,-:9@1E432,0@H=<6656:>BB<97868:4:90-+338?AA5,A?;--554--6090+,\'02EE134006>?70B<--...//10&((),1<8?:6..()*..B//.66ADB=C=9+&&**,.36H?;-3)-,(:<3</5:,;C<63(%(&*\'-43&+*/%6)**(..378)%+)\'%(\',48@3-78ABB?43-)6-B:<F00?.85\')&&2C@B8334:GJHH.2.6:==@E1,-.4D56702,/%1/5FC=.//0,+(#%\'+,171145*+84D543DC19D>36:/8BDC::873B4//(++*-($+,C@:423CF;.=.1122443-(.047C+5B@B6*)415/-,-.<199?D;A?A=C@DEEBDA>;56-1,*$%,;3>BCC:E/)692:IC:6DB,)+%.0C7<FJ<77666E;:4)(*.-19>?AAHBG>A53+))(*,84A-.3?CA270>.\'&0,26C67<BIGE@97//91/0%&(++5,7435D65.D/<0013:1()**--29GB5<ACE@6@F?669<?;=?@?@??AA=;?767<<@1---<:-(*)&\'+0../.,.+-0-;47?A:<0,2/802.;%*)/+)+0*,,,-./GHF2HE?@&+-01H@88=5<>=JA58@FFC@<.52\'50>6/,*,.A?D:=1(,*.28898254;2/-.)-&%,))-010).\'*-+&.;=;BBD4(1*);(\')+,00,17:;B=1<6<F>4D41+B?34;<FG<\'(*68,<.,,=4-292--()0;;@:;;?44-7A7EH<22()\'((&.?6G423=IJ?AIFF3?2E3HG@<.146553;@BI???<CC0>1>2<==IDD4F,,.CD?ACBDB@<</;B++(%)8:=>8:E442>=AA5062).45<>6939:),)/..8>@=4536-7754?E?7?EA;38A:C.().2136?@0(0/E+*?,+.,0.44,68=8922.\',06048CI?A..0D=;CII.,+)088;7=<>;93:697::A@8>E:<GB&@43)9*GG>E,-/+/==H:84473BD7=<:)-5+02)\'\')3?31=B5=22*H+907976)5.2/99GBCEG@.2(&%)&),@<=G3C1/1FE0<D@B968577?GA7(:,+:01AC7;@,,+&*/8E44<8II;4479/0007226==>=A3E*,CGEFG1.+0-37+)-5)*$$+.00)/=6=3*.01D12--03137:=71.2)+14-/-:7<8\';*,,*10*)=4D=/2412567E@F+841;-+/)-.1;;5123-5+10EDE;:>A78?@HD7A938A19=@?=A?-91:5-00-%*+**-(+,?:BB7EHGGD>126/+-07FA3(*),5++;GI1+..%**)00@.8,1-6A-,%\'&)/-/0,8*1)*.GC6:3.*3*\'%$$$%&.-)*)+1@010C6?62(+(&*040/+1214.0&*6/41.\',,4=GB-679798&(,+22AAE@9A?56530\'.-=;==;<;<@10327;?3?9341-5@?60*.\'+++4*.2;437@49+.()1)-929===CIHD*?B9*0--245/.B29;:03..66%?\'\'(9:61?2A++++-6<76/34:@E*?65<CC@;=03=>?7F6143:;AE:443-.)281121134,520/33<9984>?9+62/?/+7;48>;69800005:3>65+,$\'(01C@?BCHI><<68643?2A=-8>34*2+7.--++&&%\n' |