# HG changeset patch # User Lance Parsons # Date 1348181978 14400 # Node ID e6f1c31279dbcc884f902fb7fafc8b9f2535b4bd Initial version with fastq-join diff -r 000000000000 -r e6f1c31279db fastq-join.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq-join.xml Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,65 @@ + + - Joins two paired-end reads on the overlapping ends + + ea-utils + + + fastq-join + -v '$splitChar' + -p $pctMaxDiff + -m $minOverlap + #if $stitchLengthReport: + -r $outputStitchLengthReport + #end if + $read1 + $read2 + -o $outputUnmatched1 -o $outputUnmatched2 -o $outputJoined + + + + + + + + + + + + + + + + stitchLengthReport + + + + + + + + + + + + + + + + + + +Overview +-------- +fastq-join joins two paired-end reads on the overlapping ends. + +Split read ids character: Verifies that the 2 files probe id's match up to char C. Use ' ' for Illumina reads. + +Maximum difference is the maximum allowed percentage of bases that differ in the matching region. + +Minimum overlap is the minimum number of bases that must overlap (with no more than the maximum difference) for reads to be joined. + +Verbose stitch length report is a report for each joined paired of reads showing how large the overlapping section was. + +This tool uses sqr(distance)/len for anchored alignment quality algorithm. It's a good measure of anchored alignment quality, akin to squared-deviation for means. This tool uses the fastq-join program that is part of the ea-utils suite. See http://code.google.com/p/ea-utils/wiki/FastqJoin for details. + + diff -r 000000000000 -r e6f1c31279db test-data/test_read1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_read1.fastq Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,40 @@ +@JLK5VL1:222:D1888ACXX:1:1101:1656:2143 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +?+:BDDDAD8BDD?@4+5;6?;;@A;AABA>A@>AAAD;A>A>5=5>>>BBAA###### +@JLK5VL1:222:D1888ACXX:1:1101:1613:2167 1:N:0: +GTGATAGAGATACTGAGCACAGAGCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@?BDFFFFHBHHHJJJGFEHHEIHEGHJJJJDI@DHIIHGGGGGEHIHGIIIIJJJGIB?AACBEDDFCDCCCCCEDC>@CD>CC@BA>CDEECD=BDDB> +@JLK5VL1:222:D1888ACXX:1:1101:1927:2121 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@B@DFFFFHHHHHJJIHJIJJJJJJJJJJJJIJHJJIJIJIHHGIJIIJGIIJJJIIII?EEEDFFFFEEEEEDECDDDEDDDDCCBACDEDECC@BB9CCCCCACDDEEDCA9>BBDC +@JLK5VL1:222:D1888ACXX:1:1101:1763:2172 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAAGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@?@DDDBDHDFDHBFB3CCGFFHDHHIIIIIIG@<>BD? +@JLK5VL1:222:D1888ACXX:1:1101:2169:2161 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTCGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@?BFFFFFHHHHHJJJGIGIJIIJJJIGJJJJJJIIIIHGHIIGIJGIIIJJIJIIJJJEHDBDCDEEEEDDDDD@CCCDAA@CA@A:@CADD>C9<@B9C +@JLK5VL1:222:D1888ACXX:1:1101:2236:2214 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@=@DFDFFHAHDHGFEE@FFHGGEDEGHGIJ@GFGHAGFIIGBDGHIIJIGJJJJJIIIACEBDEFDFDEEEEDEDDCCDCD;CDCB@CF@DD@C?B<@DC +@JLK5VL1:222:D1888ACXX:1:1101:2090:2240 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@:?DDDDDFABCCDCFFCA:9BBBC diff -r 000000000000 -r e6f1c31279db test-data/test_read3.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_read3.fastq Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,40 @@ +@JLK5VL1:222:D1888ACXX:1:1101:1656:2143 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +BB8=4ADDHHHHACG(;(;@C@C:<:>>@>>;B>B(44?>C@B9<<8 +@JLK5VL1:222:D1888ACXX:1:1101:1613:2167 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +@?=DDDFFDFHHHIGGHHIGGFEGIJJFAFEGGGH@AFFHGIIIGEFHBF>@FCGAE@DD=D'9@DCC@;C@C@CDDCD>CCD@>>:@@C@CDDACA<28? +@JLK5VL1:222:D1888ACXX:1:1101:1927:2121 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +CCCFFFFFHHHHHJIIJJJJJJIJJJJIJJJJJJJIJJJJJJJJJJJJJJIHIJJIJEEHHF>BBDEFEDDDDDDDCDDDDCDDDEEEEDDDDDDDBDBDD +@JLK5VL1:222:D1888ACXX:1:1101:2000:2166 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +B@@DDFFFGHHHDIFHIJIIIJJJJIJJJJJIJJIIJIJIGIIJGIJJJJDFGGGII:DHFF3>C>CCCC(5(;>CDECCAC>@AB08AB +@JLK5VL1:222:D1888ACXX:1:1101:1832:2198 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATGCTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +C@@FFFFFHHHHHJJJJJIJIJJJJIIJJIJIIFIGIJGHIDGIIIGIJJIHGHIIJGIIHF<<@DDDDEEDDCCCDDDCCCDDDEDDEDDCCDDCBB?AB +@JLK5VL1:222:D1888ACXX:1:1101:2169:2161 3:N:0: +TTTGCCCTATTTATAGGACCCTAGAAAGAAAAAACCTCAAATAAAATAAAAGTGATTGGCGGGATATACTTGTGAACAATGTGTATTTACTTACCACCACC ++ +##################################################################################################### +@JLK5VL1:222:D1888ACXX:1:1101:2236:2214 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +@@@DDFFFHHHHFEEGIEHIJJGGFGIGIHCHJJJJJJIJIJGIGGIJIJGCDGEGG>EGHF=ABC@BDDCDDA@CCCCDCDE@CCD>CC@CDDDDBD?<< +@JLK5VL1:222:D1888ACXX:1:1101:2090:2240 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGATAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +CC@FFFFFGFHDDHH>@FHHCHIIGIIEGGHIJIIBEEHIGGGGJIJJJJGBFGIGGCAHEA,9?B>B@@C>@@ACACCCC@C@@CDCDC@>CCCDD@(8? +@JLK5VL1:222:D1888ACXX:1:1101:2952:2161 3:N:0: +TGGATCCTATTAATAGGACCCTAGAAAGAAAAAACGCCAATCACAATAAAGTTGATTGGCGGTATATACTTGTGAACAATGTGTATTCACTAACAACGTCA ++ +CCCFFFFFHHHGHJJJJJIJJJJJIJJJHIJJIJJIIGIIIJEIIJIIJIG@FHIJJJIJHFCCDCCDDDDDDCDCCDDEACA@CDDDDDABB diff -r 000000000000 -r e6f1c31279db test-data/testout.join.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testout.join.fastq Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,36 @@ +@JLK5VL1:222:D1888ACXX:1:1101:1656:2143 +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAATCAACTTTATTGTGATTGGCGTTTTTTCTTTCTAGGGTCCTATTAATAGGATCCA ++ +?+:BDDDAD8BDD?@4+?;;@B>BABA@A@>AACDCA>A>5>@CC>BBDEA;7@;@@DFCIGGCCABB?3GED@0FDC;CEEACIIIIIGGIHFFBHFEGIIIGHFFA@HGGGEFAFJJIGEFGGIHHGGIHHHFDFFDDD=?@ +@JLK5VL1:222:D1888ACXX:1:1101:1927:2121 +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAATCAACTTTATTGTGATTGGCGTTTTTTCTTTCTAGGGTCCTATTAATAGGATCCA ++ +@B@DFFFFHHHHHJJIHJIJJJJJJJJJJJJIJHJJIJIJIHHGIJIIJGIIJJJIIIIDEEEDFFFFEEEEEDEDDDDEDDDDDDEFEDEDEFHHEEJIJJIHIJJJJJJJJJJJJJJIJJJJJJJIJJJJIJJJJJJIIJHHHHHFFFFFCCC +@JLK5VL1:222:D1888ACXX:1:1101:2000:2166 +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACGTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAATCAACTTTATTGTGATTGGCGTTTTTTCTTTCTAGGGTCCTATTAATAGGATCCA ++ +B?@DFFFFHHHHHGIIJIIJJJJJJIJJJJJIIHIJJIJJIIGGJJGHGHIGGIIJHIGDEEEDFFEEEEFEEDDDDDDDCCCCDDEDDEEEDFFHDBIIGGGFDJJJJIGJIIGIJIJIIJJIJJJJJIJJJJIIIJIHFIDHHHGFFFDD@@B +@JLK5VL1:222:D1888ACXX:1:1101:1763:2172 +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAAGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAATCAACTTTATTGTGATTGGCGTTTTTTCTTTCTAGGGTCCTATTAATAGGATCCA ++ +@?@DDDBDHDFDHBFB3CCGFFHDHHIIIIIIG@GGEGDCGJIJIGGIGJIJIJJJJJJHCHIGIGFGGJJIHEIGEEFHHHHFFFDD@@@ +@JLK5VL1:222:D1888ACXX:1:1101:2090:2240 +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAATCAACTTTATTGTGATTGGCGTTTTATCTTTCTAGGGTCCTATTAATAGGATCCA ++ +@:?DDDDDFHHDDHFGFFFFF@CC +@JLK5VL1:222:D1888ACXX:1:1101:2952:2161 +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTTGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAATCAACTTTATTGTGATTGGCGTTTTTTCTTTCTAGGGTCCTATTAATAGGATCCA ++ +@@@DFFFFHFHHGIJIJJJJJIJJJJIIIJJIIJJIJIEHIHIEHHIGJEHIIJIJDDGDEDCFFFDFEDDEEEDDDDDDDDDCCACCCDCFFFHJIJJJIHF@GIJIIJIIEJIIIGIIJJIJJIHJJJIJJJJJIJJJJJHGHHHFFFFFCCC diff -r 000000000000 -r e6f1c31279db test-data/testout.un1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testout.un1.fastq Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,4 @@ +@JLK5VL1:222:D1888ACXX:1:1101:2169:2161 1:N:0: +GTGATAGAGATACTGAGCACAGACCCTTATTAAGCCGAGGGTCACCTAGCCAACTGACGTCGTTAGTGAATACACATTGTTCACAAGTATATACCGCCAAT ++ +@?BFFFFFHHHHHJJJGIGIJIIJJJIGJJJJJJIIIIHGHIIGIJGIIIJJIJIIJJJEHDBDCDEEEEDDDDD@CCCDAA@CA@A:@CADD>C9<@B9C diff -r 000000000000 -r e6f1c31279db test-data/testout.un2.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testout.un2.fastq Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,4 @@ +@JLK5VL1:222:D1888ACXX:1:1101:2169:2161 3:N:0: +TTTGCCCTATTTATAGGACCCTAGAAAGAAAAAACCTCAAATAAAATAAAAGTGATTGGCGGGATATACTTGTGAACAATGTGTATTTACTTACCACCACC ++ +##################################################################################################### diff -r 000000000000 -r e6f1c31279db tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Sep 20 18:59:38 2012 -0400 @@ -0,0 +1,18 @@ + + + + + + http://ea-utils.googlecode.com/files/ea-utils.1.1.2-469.tar.gz + + sed -i.bak 's/sam-stats varcall$//' Makefile + PREFIX=$INSTALL_DIR make install + + $INSTALL_DIR/bin + + + + + + +