Repository 'smalt_map'
hg clone https://toolshed.g2.bx.psu.edu/repos/nml/smalt_map

Changeset 0:77cc50d982c0 (2017-09-19)
Commit message:
planemo upload for repository https://sourceforge.net/projects/smalt/ commit 008f4667b70be22e9ddf496738b3f74bb942ed28
added:
smalt_map.sh
smalt_map.xml
test-data/ecoli_1K_1.fq
test-data/ecoli_1K_2.fq
test-data/output.sma
test-data/output.smi
b
diff -r 000000000000 -r 77cc50d982c0 smalt_map.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/smalt_map.sh Tue Sep 19 16:40:17 2017 -0400
[
@@ -0,0 +1,58 @@
+#/bin/bash
+
+
+smi=$1
+shift
+sma=$1
+shift
+#get format type so we can do extra work if it is a bam file
+format=$1
+shift
+
+#get working directory so we can find the output files 
+CUR_DIR=`pwd`
+
+
+cp "$smi" "$CUR_DIR/temp.smi"
+cp "$sma" "$CUR_DIR/temp.sma"
+
+#determine if we have 1 or 2
+num_inputs=$1
+shift
+
+inputs=()
+
+#determine how many fasta/fastq were given. Needs to be provided by user
+if [ $num_inputs -eq 1 ]; then
+    inputs+=$1
+    shift
+elif [ $num_inputs -eq 2 ]; then
+    inputs+=$1
+    shift
+    inputs+=' '
+    inputs+=$1
+    shift    
+else
+    exit 1
+fi
+
+smaltout=$2
+
+smalt map $@ 'temp' $inputs
+
+
+if [ "$format" == "bam" ]; then
+ if [ $GALAXY_SLOTS ]
+ then
+     samtools sort -@ $GALAXY_SLOTS $smaltout 'temp2'
+ else
+     samtools sort $smaltout 'temp2'
+ fi
+ mv 'temp2.bam' $smaltout
+fi
+
+#remove index files
+rm "$CUR_DIR/temp.smi"
+rm "$CUR_DIR/temp.sma"
+
+exit 0
b
diff -r 000000000000 -r 77cc50d982c0 smalt_map.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/smalt_map.xml Tue Sep 19 16:40:17 2017 -0400
b
b'@@ -0,0 +1,340 @@\n+<tool id="smalt_map" name="smalt map" version="1.2.0" >\n+    <description>Map query reads (FASTA/FASTQ) format onto the reference sequences</description>\n+    <requirements>\n+        <requirement type="package" version="0.7.6">smalt</requirement>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:"   level="fatal"   description="Unknown error" />\n+        <regex match="Command line error"\n+               source="stdout"\n+               level="fatal"\n+               description="You cannot do that!! What were you thinking!" />\n+        <regex match="ERROR"\n+               source="stderr"\n+               level="fatal"\n+               description="You cannot do that!! What were you thinking!" />\n+    </stdio>\n+    <command>\n+        bash $__tool_directory__/smalt_map.sh $smi $sma $oformat.outformat\n+\n+        #if $singlePaired.sPaired == "single":\n+              1 $singlePaired.sInput1\n+        #elif $singlePaired.sPaired == "paired":\n+              2 $singlePaired.pInput1 $singlePaired.pInput2\n+        #elif $singlePaired.sPaired == "collections":\n+              2 $singlePaired.fastq_collection.forward $singlePaired.fastq_collection.reverse\n+        #end if\n+\n+          -o $output\n+\n+        #if $oformat.outformat == "sam":\n+          #if $oformat.samOptions:\n+             -f "$oformat.outformat:$oformat.samOptions"\n+          #else\n+             -f "$oformat.outformat"\n+          #end if\n+        #elif $oformat.outformat == "bam":\n+          #if $oformat.bamOptions:\n+             -f "$oformat.outformat:$oformat.bamOptions"\n+          #else\n+             -f "$oformat.outformat"\n+          #end if\n+        #else\n+          -f "$oformat.outformat"\n+        #end if\n+\n+\n+\n+          -n \\${GALAXY_SLOTS:-2}\n+\n+        #if $singlePaired.sPaired != "single":\n+          -l $singlePaired.pairtype\n+        #end if\n+\n+\n+        #if $mincover:\n+          -c "$mincover"\n+        #end if\n+\n+        #if $scordiff:\n+          -d "$scordiff"\n+        #end if\n+\n+        #if $insfil:\n+          -g "$insfil"\n+        #end if\n+\n+        #if $insertmax:\n+         -i "$insertmax"\n+        #end if\n+\n+        #if $insertmin:\n+          -j "$insertmin"\n+        #end if\n+\n+        #if $minscor:\n+          -m "$minscor"\n+        #end if\n+\n+        #if $minbasq:\n+          -q "$minbasq"\n+        #end if\n+\n+        #if $seed:\n+          -r "$seed"\n+        #end if\n+\n+        #if $sw_weighted:\n+          -w\n+        #end if\n+\n+        #if $search_harder:\n+          -x\n+        #end if\n+\n+        #if $minid:\n+          -y "$minid"\n+        #end if\n+\n+    </command>\n+\n+\n+    <inputs>\n+        <conditional name="singlePaired">\n+            <param name="sPaired" type="select" label="What is the library type?">\n+                <option value="single">Single-end</option>\n+                <option value="paired">Paired-end</option>\n+                <option value="collections">Paired-end Collections</option>\n+            </param>\n+            <when value="single">\n+                <param name="sInput1" type="data" format="fastq" label="Single end illumina fastq file" optional="false"/>\n+            </when>\n+            <when value="paired">\n+                <param name="pInput1" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Forward FASTQ file" help="Must have ASCII encoded quality scores"/>\n+                <param name="pInput2" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Reverse FASTQ file" help="File format must match the Forward FASTQ file"/>\n+                <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library">\n+                  <option value="pe">Illumina paired-end (short inserts)</option>\n+                  <option value="mp">Illumina mate-pair library (long inserts)</option>\n+                  <option value="pp">Mate-pair sequenced on the same strand</option>\n+                </param>\n+            </when>\n+            <when value="collection'..b" format. &#060;ouform&#062; can be either 'sam'(default),\n+     'cigar', 'gff' or 'ssaha'. Optional extension 'sam:nohead,x,clip'\n+     (see manual). Support for BAM format is dependent on additional\n+     libraries (not installed).\n+\n+  -F &#060;inform STR&#062;\n+     Specifies the input format. The only available format is fastq (default).\n+     Support for BAM and SAM formats (see: samtools.sourceforge.net) depends\n+     on additional libraries (not installed).\n+\n+  -g &#060;insfil STR&#062;\n+     Use the distribution of insert sizes stored in the file &#060;insfil&#062;. This\n+     file is in ASCII format and can be generated using the 'sample' task see\n+     'smalt sample -H' for help).\n+\n+  -H     Print these instructions.\n+\n+  -i &#060;insert_max INT&#062;\n+     Maximum insert size (only in paired-end mode). The default is 500.\n+\n+  -j &#060;insert_min INT&#062;\n+     Minimum insert size (only in paired-end mode). The default is 0.\n+\n+  -l &#060;pairtyp STR&#062;\n+     Type of read pair library. &#060;pairtyp&#062; can be either 'pe', i.e. for\n+     the Illumina paired-end library for short inserts ( \\|&#8212;&#062; &#060;&#8212;\\| ). 'mp'\n+     for the Illumina mate-pair library for long inserts ( &#060;&#8212;\\| \\|&#8212;&#062; ) or\n+     'pp' for mates sequenced on the same strand ( \\|&#8212;&#062; \\|&#8212;&#062; ). 'pe' is the\n+     default.\n+\n+  -m &#060;minscor INT&#062;\n+     Sets an absolute threshold of the Smith-Waterman scores. Mappings with\n+     scores below that threshold will not be reported. The default is\n+     &#060;minscor&#062; = &#060;wordlen&#062; + &#060;stepsiz&#062; - 1.\n+\n+  -n &#060;nthreads INT&#062;\n+     Run smalt using mutiple threads. &#060;nthread&#062; is the number of additional\n+     threads forked. The order of the reads in the input files is not preserved\n+     for the output unless '-O' is also specified.\n+\n+  -o &#060;oufilnam STR&#062;\n+     Write mapping output (e.g. SAM lines) to a separate file. If this option\n+     is not specified, mappings are written to standard output.\n+\n+  -O     Output mappings in the order of the reads in the input files when using\n+     multiple threads (option '-n &#060;nthreads&#062;').\n+\n+\n+  -p     Report partial alignments if they are complementary on the read (split\n+     reads).\n+\n+  -q &#060;minbasq INT&#062;\n+     Sets a base quality threshold (0 &#060;= minbasq &#060;= 10, default 0).\n+     K-mer words of the read with nucleotides that have a base quality below\n+     this threshold are not looked up in the hash index.\n+\n+  -r &#060;seed INT&#062;\n+     If &#060;seed&#062; &#062;= 0 report an alignment selected at random where there are\n+     multiple mappings with the same best alignment score. With &#060;seed&#062; = 0\n+     (default) a seed is derived from the current calendar time. If &#060;seed&#062;\n+     &#060; 0 reads with multiple best mappings are reported as 'not mapped'.\n+\n+  -S &#060;scorspec STR&#062;\n+     Specify alignment penalty scores for a match or mismatch (substitution),\n+     or for opening or extending a gap. &#060;scorspec&#062; is a comma speparated\n+     list of integer assigments to one or more of the following variables:\n+     match, subst, gapopen, gapext, i.e. 'gapopen=-5,gapext=-4' (no spaces\n+     allowed in &#060;scorespec&#062;). Default:'match=1,subst=-2,gapopen=-4,gapext=-3'\n+\n+  -w     Smith-Waterman scores are complexity weighted.\n+\n+  -x     This flag triggers a more exhaustive search for alignments at the cost\n+     of speed. In paired-end mode each mate is mapped independently.(By\n+     default the mate with fewer hits in the hash index is mapped first and\n+     the vicinity is searched for mappings of its mate.)\n+\n+  -y &#060;minid FLT&#062;\n+     Sets an identity threshold for a mapping to be reported (default: 0).\n+     &#060;minid&#062; specifies the number of exactly matching nucleotides either as\n+     a positive integer or as a fraction of the read length (&#060;= 1.0).\n+    </help>\n+</tool>\n"
b
diff -r 000000000000 -r 77cc50d982c0 test-data/ecoli_1K_1.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecoli_1K_1.fq Tue Sep 19 16:40:17 2017 -0400
b
b'@@ -0,0 +1,8216 @@\n+@EAS20_8_6_1_9_1972/1 trim=6\n+ACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCG\n++\n+HHHHHHGHHHHFHHGGHHFHHHHHFHHFHFHHHHHFHHHHHFHHHHHHHHFHHFHFDHHGG@BGGHCDHE:;3)7.A973A:AA5>AD9G=D<D\n+@EAS20_8_6_1_163_1521/1\n+GCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACTCGCATGGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTG\n++\n+HGHHIHHHDHHHHHIHHIHHHHHHHHHBHHHHHFHCFHHHHHHHGHHHHHEHHFHHHGHHIHHHGHGHHHIHFHHHHHGH?5<<;BD>6>?BGEHHGHFG\n+@EAS20_8_6_1_178_1948/1\n+ATTCGAGGTAATGCCCCACTGCCAGCAGTTTTTCGACCGGATCGATAACAGTAACGTTGTGACCGCGCGCTTCTAATACGCCGGCCATAATGGCGATCGA\n++\n+GHHHHHHHHHGGHHHHHHGHHHHHHHEHHHHHHHHHHHHHFHHHEHHHHHHHFHFHIHHIHHHIHHHGHIIGGHBGGGHHFFGBHIFFGCIGGEFDG@AG\n+@EAS20_8_6_1_318_1522/1 trim=5\n+AAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCAC\n++\n+HHHHHHHHHHIHHHHHHHHHHHHHHHHHGEHHHHHEHHHHHHIHHHHGHHEHHGHBHDDEHHEGH?HEGGGIHDIGFFFEHE;EE;=AFE;AB/B\n+@EAS20_8_6_1_348_1372/1\n+AAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAAC\n++\n+HHHHHHHHHHHHHHHHHHHEHHGHHIHHHDHHHHHHHHHHFHHHGHHFHHH@HHHFHHHEHH<HH?@D>DHDEBEEGDFDCHECGB:?43CCCFG?90<6\n+@EAS20_8_6_1_389_646/1 trim=2\n+ACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAAC\n++\n+HBHGGHHIGHHHGDHHGHHDHHHGDHFGGHHHHHHHHHHHHIHHGFGHHHHHGHGGHGGDHHEFHDDHGGGHHH=HDDB9FHGGEIEHICEB?DBBAE\n+@EAS20_8_6_1_513_951/1\n+TTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGG\n++\n+HHHHHDHHHHHHIHGHGIEEDFHBDDHGGGBC?C?IHD>GFHGHG?DDGEB?;?,DCF=D??F=BDBGE=DDGDG.>76?>EB>54+?;=5;BEBDAGGA\n+@EAS20_8_6_1_530_1080/1 trim=1\n+AAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAG\n++\n+EHHHHHHGHHFEHHHHHGHHHHHHGHH=BHHHEHHHHHHHEHFEEHHFDHHHHHHFGHHHHFHFHHEHFHFEHEEHB>EG=F?E=>FHE@CH5HB:81@\n+@EAS20_8_6_1_546_406/1\n+AAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCAT\n++\n+HHHHHHGGGHHHHHHHHHHHHHIHHHHHHHHHHHHIFHHHHHHHHHHHIFHHHHHHHHHHHHHGHHHHFHHHHFHHHHEHHDH@8GFHHGHEFBHHGG5G\n+@EAS20_8_6_1_553_1132/1 correct\n+CGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAG\n++\n+HHHHHIHHHHHHGHHHHHHHHHDHHHHHHHHHHHIGHH@HHHHHGHHFHDHHHHHHHHGGGHH=HDHHHFHHHGHHHFHH55445#A@@:55555EGGDE\n+@EAS20_8_6_1_594_142/1 trim=11\n+GGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTGTCAGGTGCGGGCTTTTTTCTGTGTTTCCTGTACGCGTCAGCCCG\n++\n+HIHHHHHHHHHHHHHHHHHHHHHIHHFHDHHGEBEEEHHGHIIHHFHHEG@FAFHD;DEFHHH@E.:82=??:=:F5?E6;4<:@B?AE\n+@EAS20_8_6_1_641_1277/1 trim=50\n+GGGCTTTTTTCTGTGTTTCCTGTACGCGTCAGCCCGCACCGTTACCTGTG\n++\n+HHE=GHHHHHECGHHHG@FDHEFF7@@CAA?FA>FA?9;;>@;5=1AA+D\n+@EAS20_8_6_1_660_979/1 trim=5\n+AGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAAT\n++\n+HHEGHHHHHHHHHHHHHHHHGFFHHHFEEAHEHHGHHEHDFGHIHHHGGD<9F:FHFHHBC4EEEDEBGHHGE@BCBEDGGFFGACFG=4C39:B\n+@EAS20_8_6_1_763_241/1 trim=42\n+TTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTGTCAGGTGCGGGCTTT\n++\n+GHHHIG@HHHHHHGHGIHHHHHHHEDDFEHCBGGHHHHH@HFFGGAGE;B+>3223;D\n+@EAS20_8_6_1_789_699/1\n+GCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGT\n++\n+HGHHHHHHHHGHF@GHHGHHFHF@FD??F>CDHHFHHHHHBEFHHAHCHHHEHH@HHHDFHH;HGHGFHHF>1DDFF?FFEHFEHF0AE.2061585=@/\n+@EAS20_8_6_1_802_355/1 correct\n+AGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGAT\n++\n+HIHHHHHHIGHDHHHHH#HHFDFBB?DDA>EFGDFHHFHFHHHHHHHEGFHHHFGHBH=;FF@;FEGHHHHHFHCHHEHDFHEHHFHDH=?HEFEHEHAH\n+@EAS20_8_6_1_807_1707/1 trim=15\n+TCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGCGG\n++\n+FHCEHGFHHHHGEEHHHHGHHFFHFHGHHHGHGFHH=HFCHHHHHEHEHHDFHHFFHHHHAHHBHA4>C6?@=8>5FBBFE?EEB\n+@EAS20_8_6_1_918_1054/1 correct trim=10\n+AACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCA\n++\n+HHGHHHHHHHHHHHHHHHHHHHHEHHFHHHHGHHHEHHHFIHGHFHHHFHGGFHC>D1D<AFD:CGGGGAB=FCECE23B/#8>=DD:EC\n+@EAS20_8'..b'<@>@8BG9G9\n+@EAS20_8_6_100_346_1515/1\n+TAATACGCCGGCCATAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGC\n++\n+FGDHEHHHHHHGHHHHHHHHBG?GEHHDDGHHHDH@GEEH?CHHFF?FDFEEFHEEFHGHHFFHEIHHGBGGEGDIGFEDEHBEHGEAFHGFH@HGGA?H\n+@EAS20_8_6_100_434_840/1 trim=47\n+ACCTGCCATCAGCACCATGTGATCAGCCGGAATGCGGCTTGCCGCAATACGGC\n++\n+HGHHHGHHHHHHGGHHHGHHHEGFHHHFHHDGHHEHGGHE:5=>9-<<.-BGB\n+@EAS20_8_6_100_451_465/1 correct\n+CGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACC\n++\n+FHHEHHHGHHHHHGFDHHBHBBDDDHEHHHGHIHHDHHEHHHHHEFHHHHHHI@HE=@FBFFCC#HHDHHEHHHH@HGADFF,=FB@F?F?FB4F?F@BB\n+@EAS20_8_6_100_452_1846/1 correct\n+CGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTA\n++\n+HFHHEHHHGHEEFEHHHHHHHGFHHHHHDHFGHFHGHHGH@B5>BAEHEHFF#?;-54457HHFAFACEFFCFAFGGGECGEEG#@@@AAFGGCDBHEE=\n+@EAS20_8_6_100_723_1564/1\n+TCTAATACGCCGGCCATAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCAT\n++\n+HHHHHHGHGHHHHHHHHDHHHGGHGFFHD=DFHHHFHHGGHHFHHHHHHHHDHGGHHHCHEHHGHGFHHHHGEG3H@HEDHHGHHFGEGGGGFFFGEGAG\n+@EAS20_8_6_100_776_1749/1 trim=5\n+GGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAAT\n++\n+H,/E8;GH88;?8AA=CFHFCF/F7FE0EA@1.=;ACFF>C=HFFFD=D=HFBFH1FFFG>@CD;GGFGBGAGGF8HHHHHHEHIHEH8H??,<A\n+@EAS20_8_6_100_779_871/1 trim=20\n+GTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAA\n++\n+GHHGGHGHHHHHHHCHHHFHHHFHHHHDFHHEHGHHEDHF?GG@::FGCDCB6E@F6:F<BFAC?EDF-:FEHFB:226A\n+@EAS20_8_6_100_816_1759/1 correct trim=47\n+AAAAAGCCCGCACTGTCAGGTGCGGGCTTTTTTCTGTGTTTCCTGTACGCGTC\n++\n+#GGHHHBHCGHHHHB@>DA@0#?8>/:/<=FFF/@,3/444(555<5/?3A,A\n+@EAS20_8_6_100_993_1119/1\n+GTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACC\n++\n+?@FDBEFGF8BFGFGHHHHEHHHHEHBFHHHHGGHHH@HGFHHHDH5HB?GHDGDFDHEHIFHFHFFHFE?HDFCFGEHEGBE=HHGGGGGHFHHGGIEG\n+@EAS20_8_6_100_1153_966/1\n+AGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHCHHHHGBFHHDHDHHHHFCHBHEGEHEHGGEHH@GEG8FFBEHFF:FBFDA+DFC;C3\n+@EAS20_8_6_100_1183_226/1 correct trim=26\n+TTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCG\n++\n+CFCCF?DCCDHEGHFHHHHFFFHD@HGEHHHGHHHGHHFHHHHGEEH:H>HHHDCFF#EADA366=A0A+8<AA\n+@EAS20_8_6_100_1263_900/1 correct trim=1\n+AGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCAC\n++\n+HGHHHHHHIHHHIHFBD?BCHHHHHDGHHHHEHHFCHFHHBHEHHHFHHDHHFEHHHGH=HEFFFHH@CFHBHEHHBGHGBCDBGFDH<EAF#9F#?@A\n+@EAS20_8_6_100_1277_357/1 trim=5\n+ACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGCGGGGGCAG\n++\n+DEHHHHIEHHHHHHHHGIGHHHHHHHGHHHHEFEHHHHHHHHFEHHHHHHHHH@CGHGCHFHHE@FB<=F=FFFFFFDG;BFDFFGFEG@9B24C\n+@EAS20_8_6_100_1337_252/1 correct trim=16\n+GCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGG\n++\n+GBHHHHHHGGHHBHEG@BEHHDHAHGBHGHDGDDEH#EBEDBFDHHC2EDF9?@EEHHE?CCH=EHEGAGGGEE4AGA6E1G?F\n+@EAS20_8_6_100_1432_436/1 trim=3\n+GTAGATTCGAGGTAATGCCCCACTGCCAGCAGTTTTTCGACCGGATCGATAACAGTAACGTTGTGACCGCGCGCTTCTAATACGCCGGCCATAATGG\n++\n+HHHHHIHHGGHDGHHHHHEGHHHGHHDHHHEHHGHHHHGHHFHGDGHHDHECHDEGGFGGFGFFBGGEGEG?GBGHEHB9EGEDGDAEDEDE?DFBF\n+@EAS20_8_6_100_1457_1209/1 correct trim=7\n+TTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAAT\n++\n+HGHFFDBCBD#BBFFEDDFCHGHHEHCHHHEHHHHHHEAHHHGHBDGHEHEEGHGEE@GGGGFG?5C;A5:0;A4988-/>8@5>;<-37<?B\n+@EAS20_8_6_100_1609_245/1\n+GGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCGGCAACACGCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACT\n++\n+BCBBDFHHHHHHEHHHHGHHHHHHDHHHHHHHGHHHGIHHHHHEHIDHGGIHHHFGHFHHGHHBGFFHHHHFHHFHHGHHIHHEHGFF=HGFHHBFFGBH\n+@EAS20_8_6_100_1637_1332/1\n+TGGTGCTAATGCGTTTCATGGATGTTGTGTACTCTGTAATTTTTATCTGTCTGTGCGCTATGCCTATATTGGTTAAAGTATTTAGTGACCTAAGTCAATA\n++\n+FHFHHFGGHGHFHGHHGHHHFEHHHGFFDFHFHHHGEDCGHHHHHG9HDGHFHGHHHEHEFGF=GAHEHHFEGFFGEHFHHHHFHHEBE?HHFHEGFDHH\n'
b
diff -r 000000000000 -r 77cc50d982c0 test-data/ecoli_1K_2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ecoli_1K_2.fq Tue Sep 19 16:40:17 2017 -0400
b
b'@@ -0,0 +1,8216 @@\n+@EAS20_8_6_1_9_1972/2 correct\n+GGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCGGCAACACGCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACT\n++\n+HFHHHGHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHGHHGHEHHHHHHEH;G?F<F?GDFF/EEFBD:DCHHH7A@?EEH@HH96:4F@#76=C@@\n+@EAS20_8_6_1_163_1521/2\n+GGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGTAACG\n++\n+HHGGFHHGHHHHHHHGFHHHHHHHCGHFHFHFHHHHHHGHHGHHDHHIEGEDFEHHCHHHHGCCGF>GFEFEGGFGGEIG1ACBCF?CD1?CADCD.DFB\n+@EAS20_8_6_1_178_1948/2\n+ATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTG\n++\n+HGHIHHHIHGGHHHHGHHFHHFDHHHHHGHHBHHHHHHGGGHHHHHHHHHFHHHHGHHFHFEDHGDHH>HGEFCHHCHHH=GGHBEGFFHEGGD@HHEEH\n+@EAS20_8_6_1_318_1522/2\n+AGAATATCGGCAACACGCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACTCGCATGGTTGTTACCTCGTTACCTTTGGTCGAAA\n++\n+IGHFHHHDEHGHHEHHHEGHDHHHGHHHHHHEHHHHHDHHHHHGHHHHEGFAEHDGHFHHFDHFGHHHIGHIEGHHGGHFGFAHGHGFEHDHEEEB7<3<\n+@EAS20_8_6_1_348_1372/2\n+GGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGC\n++\n+HHHHHH@HGHHEHGHGHHHHDHHHHFHEHDDHHDHHHEEHHDHHHBFHCEGEHGGGDHHE@EFDAHHGHEDFGG:FDB<>=BDG?D=3BB>G?GGEGGD;\n+@EAS20_8_6_1_389_646/2 trim=17\n+TGCCACTGATGTACCGCCGAACTTCAACACTCGCATGGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTGTC\n++\n+HGGGDHHHAHGHHHHGHIHHEHHHGHFHHHHHFHHEG=GHEEFEHEHHEEEDGHGHFHDFFFADDHHF:>0&D0A4?,?AE=?\n+@EAS20_8_6_1_513_951/2 correct\n+ATAACAGTAACGTTGTGACCGCGCGCTTCTAATACGCCGGCCATAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCG\n++\n+GHFHGHDHGHHHHHHFHHHEFHHHHHGHFHBHEHHHFDHHGGHHBEHHGHHHFFFBHHHHHGEGDHFIHHHHEEC?GEGDFE;<E+@5@>?E;=?#A)?=\n+@EAS20_8_6_1_530_1080/2\n+GAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACTCGCATGGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTGTCA\n++\n+D?D?DG;FAFFGDGFDF?F:>=DD>GGGGGDAD:D+DD>DDFAFFDGFG4FFFFC<FF>BGCG/CFF59AFBD6DG@BA1.53;-:9:FG::F-:><>96\n+@EAS20_8_6_1_546_406/2\n+CGGGTGGACTCAGCAATATCGACGGTAGATTCGAGGTAATGCCCCACTGCCAGCAGTTTTTCGACCGGATCGATAACAGTAACGTTGTGACCGCGCGCTT\n++\n+HHHHEHHHHHHHIHHFHHHHHHHHHGHHHIHHHHHFGHHGHHHHHBHHGBHHEHGHCGGGGHAGHEGGGGHEDECGGFE8GGE=EGGC>FGE@DDECGDD\n+@EAS20_8_6_1_553_1132/2 trim=6\n+GGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCC\n++\n+HHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHEGHHHHHABGEEHHDHAGHDHGGHFFFGH@HDGGHGHEEFIIBFFFIEGD@EFG<E@EC<6B\n+@EAS20_8_6_1_594_142/2\n+AACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACC\n++\n+GGHHHHHHHHHFGHHHHHGHHHDGDHHHHHHHFH?EHHHHGDGEHHHHDHHHIHHGIHHEFFHHHHFGFHGEHCGGGEGA;HFH@DEFEBEEAHE??=?C\n+@EAS20_8_6_1_641_1277/2\n+GGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTA\n++\n+FFE?FEBFG?EHFHDHDFHDEHGAEEFGFHHH?GH;HAGGFF7AEHCCBH?0FG=2FAB4EHGB>BC0<?EE=9C=E;F;:;AAAA+,=,6E@2AC3?<>\n+@EAS20_8_6_1_660_979/2\n+CCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAA\n++\n+HDHHEFHH@FHHHEHHGGGGEEEHFFHHFGHHHHHGGHHDBFBF;FFFGFDFGGHHHHG<FHCHHA9=B?=G@@?EHED9F8DHEDD3EBC&GD3)-000\n+@EAS20_8_6_1_763_241/2\n+TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGT\n++\n+HHFDFGHHHHHEHHCHHFGHBEEAGHBCGEGHHEHDHEHH=HHHHHHHHGHHDBFGHHDGBIHHHHCHEHBE>HHGHBHHFHEGEHGHBDEHEH=F:E??\n+@EAS20_8_6_1_789_699/2 correct trim=20\n+TGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCC\n++\n+H=BHHFAECGAHHHF<AF@FFEEEEHHHHFCHHHHHGHH=FEHHHEHH4E1DAA=FEG?AHDEH#*F;1/AA7(D<?)FF\n+@EAS20_8_6_1_802_355/2 correct\n+CTGTACGCGTCAGCCCGCACCGTTACCTGTGGTAATGGTGATGGTGGTGGTAATGGTGGTGCTAATGCGTTTCATGGATGTTGTGTACTCTGTAATTTTT\n++\n+HHHHHHHHHHHGGGGHHHHIHGGIHHHHGHHEAEBFFBCF=GGD?D@2C;?<6?BFBFF>BCCE?BBDGDBD>2HF?8?C?:19?0&5-#=:/;6DHGD>\n+@EAS20_8_6_1_807_1707/2 trim=1\n+GAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGG\n++\n+FCFF2FBBFBEHGHH?D=AAFDGGCCHHHHBHEEHDEFAFFGGGDGGABIGECHEBBBHFHHHDDFEGGEEFD=1=BDDDCCGEEDDJCBBDDE;BEAG\n+@EAS20_8_6_1_918_1054/2 correct\n+GCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGT'..b'6_100_346_1515/2\n+TGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTC\n++\n+AADA@BBFBDHCGGBHHHGHH=HEHFHHH@HFHHFFDHFE;HHEHGF<CGHHH@FGGG1F<B9BDHHHH=H?HE6GFADFEBHBDFD/BGF1F??HGHGF\n+@EAS20_8_6_100_434_840/2\n+AGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTAC\n++\n+HHHGHHFHGGHHEFHHH>FHHHHHFHFHHEHHHFFHHHHDHHGHHBAHHHFHHEHFHHGBIHHEFIFGEDGGC;FE80>ABC4CAEGG;A182;916>98\n+@EAS20_8_6_100_451_465/2 correct trim=22\n+GTTGCGTCCAAGCACCACCAGTTCGCCTTTTTCATTACCGGCGGTGAAACCTGCCATCAGCACCATGTGATCAGCCGG\n++\n+HIHHDHHHHGHIGHHHHGHHHFHHIHIHHHHHIHHHHHGIH@HH>A;AD95#+43=?C:35:6865/9;13=<:?B?C\n+@EAS20_8_6_100_452_1846/2 correct\n+CTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGA\n++\n+HHHHHGHHHHGHHHFHHHHGHGHDHHFHBEHHHFHFGHHGHHHDHE#HEDF?CF?DFD:E252+7CBF?14:*:AA;:C9;9;+ABAD8#64<::;3+<7\n+@EAS20_8_6_100_723_1564/2 correct trim=2\n+TGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTG\n++\n+HHHEHEHEH?HHEEHEEGHHBEFEEHHHFHHHHHHFHEHFEHHFHGHHGHED/EHDHEH@A88D@C(E6BEEHD@5?CE:AG;D@C;;2@>#8.95?C\n+@EAS20_8_6_100_776_1749/2\n+TCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATC\n++\n+F3G30CEG?GDD4D=H<8BFEF8HDGHHH1HHFFEHDHB1;5>;<8EDC8??@ADEHH@HEEEHFHHHHHHEHHHG:@FFFGA6AEHH@=EHH;HFC/FF\n+@EAS20_8_6_100_779_871/2\n+AAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAG\n++\n+FGEFCEHFFFBHHFHHHHEEHHHBCFAFEHGHGHH7B@:EDFHDE5CAABBHCB7@8?A8?FB3BBADBDG>;;CF=FGGD52B0?A2A?;;=;BA5DAE\n+@EAS20_8_6_100_816_1759/2\n+AAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAAC\n++\n+63;=>>3>>?DECADBBDCG?@7?5DCEEEGGGECGGGCGGGDCGGFCG6FGBCGGGGG>DGCGGBEGG>4E7?GFGG<GGGGEG;,GBADD8ADG>2EF\n+@EAS20_8_6_100_993_1119/2 trim=60\n+TGGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCC\n++\n+B>BBB=F:FGFHFBHHHHHHHDHDGHEH=HHHHGE>8DC=\n+@EAS20_8_6_100_1153_966/2\n+TCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATCGA\n++\n+HHGHHHHHHGHHHHHGHHHHHHHHHHGHHHHHHHHHGHGHHHHH;HFDEHFDHDHHHHHGDHHHHDGHFGBGHHCGGHFH>ECGGHGECFFDFFD?DDDB\n+@EAS20_8_6_100_1183_226/2 correct trim=40\n+AATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCAC\n++\n+DDBBD@EEEGDHHEHBCBB0=GCGC4?.6=D8CF89:DFBGG5==*83<#?/:;+E@ABB\n+@EAS20_8_6_100_1263_900/2\n+AAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTG\n++\n+HHHEHHHEGIHHHGHGGHHHHHHHHHHHHHH=HFHHHHHFHHHGEHHFHHHGEBHHHBCHHDEGHGGF@G@HGEE=F;C9H39H=FEHFHHEFH?=AEFE\n+@EAS20_8_6_100_1277_357/2 trim=5\n+GCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGG\n++\n+FFFHFHHHHHHHHGHHDHHBBHHHHHHEEEH<GHGH@FHHHHGGHGHHHFGHHGGHGGHHHFBEFFEHHHECGIIFHEGEEDFEEH//EBD4=EF\n+@EAS20_8_6_100_1337_252/2 correct\n+CATAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTT\n++\n+HHHHBHEECG=FEEFH.HHH=GEGE@HEF<BFHHBHHAHHBBBB>DGHH8HDF/=F=FF==;/FFCGGHDGBHG-B<@BFE=:FE5=E98E=E-H=@C#C\n+@EAS20_8_6_100_1432_436/2\n+CAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGC\n++\n+HHHHHFHHGHHHHHGGHHHHGHHHHHHHHGHHHGHHHGGGEEEHHHHHHFHEDGGGHHFHHHDGHBFGFF>BHH8FBF0F@>DFGHHBBEH>FBD@DHEG\n+@EAS20_8_6_100_1457_1209/2 correct trim=11\n+GCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGG\n++\n+HHHHBGHHHFH?HHHHCCFDB=?D=HHEEAEFEHGEB=HHC68C5??B==B=F<;C7=ECH=HE:?CE>@?<EB1.-87<<#3C9@>??\n+@EAS20_8_6_100_1609_245/2 correct trim=16\n+TCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGGCTTTTTT\n++\n+HHEHHHGHHGHHHHDIHHHHHHHHHFHAFHFHHEIFDHFH8HHHFHHHGHHHEEB<8FFFCG>FE==9D9#0:?45/<#;B8=D\n+@EAS20_8_6_100_1637_1332/2\n+ATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTT\n++\n+HHGEHGHHHHGHIGHAHEHEHEIHGGHHHHHHHGGHHHHFHEGDGCCCHHHGHHDHHIHFEEHHHHHGHIHHHFHHIEIHHHHFAEEEHHIEHDHFFHFH\n'
b
diff -r 000000000000 -r 77cc50d982c0 test-data/output.sma
b
Binary file test-data/output.sma has changed
b
diff -r 000000000000 -r 77cc50d982c0 test-data/output.smi
b
Binary file test-data/output.smi has changed