# HG changeset patch
# User lparsons
# Date 1353445280 18000
# Node ID f0d19a935325073efc5e624c1c7068113bb7f3c7
# Parent 71eaf2e85ae721e14e69e249532d11f7e6e414af
Uploaded
diff -r 71eaf2e85ae7 -r f0d19a935325 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Tue Nov 20 16:01:20 2012 -0500
@@ -0,0 +1,19 @@
+== ea-utils Galaxy Wrapper ==
+
+This is a Galaxy wrapper for some ea-utils tools, fastq-join and sam-stats.
+
+** Installation **
+
+Installation from a tool shed provides the necessary tool dependencies.
+
+Otherwise, make sure fastq-join and sam-stats are in the path.
+Move the test data files to your galaxy root test-data.
+Move the xml files to a subdirectory of your tools directory and add lines in tool_conf.xml to point to them.
+Restart the Galaxy server.
+
+** Attribution **
+
+The ea-utils package and associated documentation can be found at: http://code.google.com/p/ea-utils/
+
+The galaxy wrapper code was written by Lance Parsons (lparsons@princeton.edu), Lewis-Sigler Institute for Integrative Genomics, Princeton University.
+The code is housed on BitBucket at: https://bitbucket.org/lance_parsons/ea_utils_galaxy_wrapper
diff -r 71eaf2e85ae7 -r f0d19a935325 sam-stats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam-stats.xml Tue Nov 20 16:01:20 2012 -0500
@@ -0,0 +1,83 @@
+
+ - Compute statistics from SAM or BAM files
+
+ ea-utils
+
+
+ sam-stats
+ $trackMultAlign
+ $reportAllChr
+ #if $rnaSeqStats:
+ -R $rnaSeqStatsFile
+ #end if
+ #if $input.extension == "bam":
+ -B
+ #end if
+ -S $histBinSize
+ $input
+ > $samStats
+
+
+
+
+
+
+
+
+
+
+
+
+ rnaSeqStats
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Overview
+--------
+sam-stats computes varius statics on SAM/BAM alignment files.
+
+Complete Stats::
+
+ <STATS> : mean, max, stdev, median, Q1 (25 percentile), Q3
+ reads : # of entries in the sam file, might not be # reads
+ phred : phred scale used
+ bsize : # reads used for qual stats
+ mapped reads : number of aligned reads (unique probe id sequences)
+ mapped bases : total of the lengths of the aligned reads
+ forward : number of forward-aligned reads
+ reverse : number of reverse-aligned reads
+ snp rate : mismatched bases / total bases
+ ins rate : insert bases / total bases
+ del rate : deleted bases / total bases
+ pct mismatch : percent of reads that have mismatches
+ len <STATS> : read length stats, ignored if fixed-length
+ mapq <STATS> : stats for mapping qualities
+ insert <STATS> : stats for insert sizes
+ <CHR> : percentage of mapped bases per chr, followed by a signature
+
+Subsampled stats (1M reads max)::
+
+ base qual <STATS> : stats for base qualities
+ A,T,C,G : base percentages
+
+Meaning of the per-chromosome signature:
+
+ A ascii-histogram of mapped reads by chromosome position. It is only output if the original SAM/BAM has a header. The values are the log2 of the # of mapped reads at each position + ascii '0'.
+
+See http://code.google.com/p/ea-utils/wiki/SamStatsDetails for more information on each stat, how it's calculated and what it means.
+
+This tool uses the sam-stats program that is part of the ea-utils suite. See http://code.google.com/p/ea-utils/wiki/SamStats for details.
+
+
diff -r 71eaf2e85ae7 -r f0d19a935325 test-data/test.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.sam Tue Nov 20 16:01:20 2012 -0500
@@ -0,0 +1,100 @@
+GEN-SEQ-ANA_0001:1:1:17434:14109#0/1 0 PCRPRIMER1 5 255 33M * 0 0 ATACGGCGACCACCGAGATCTACACTCTTTCCC ??>>BB>>A5AAA;;9:B;5>>?:B>BAABBBBBBBBABBB; XA:i:0 MD:Z:38 NM:i:0
+GEN-SEQ-ANA_0001:1:1:16837:9043#0/1 0 PCRPRIMER1+1 14 255 27M * 0 0 CCACCGAGATCTACACTCTTTCCCTAC ?BB?ABB?BBB<7A5?7??A?:CC>CCCCCC?@CCBB@B@BAB@ABBBBBB5:?5? XA:i:0 MD:Z:43 NM:i:0
+GEN-SEQ-ANA_0001:1:1:18498:19719#0/1 0 PCRPRIMER1+1 58 255 38M * 0 0 TAATGATACGGCGACCACCGAGATCTACACTCTTTCCC CCCCCCCCCBCCACCC@CCC@BCCCCCC@CCCCCCCCC XA:i:0 MD:Z:38 NM:i:0
+GEN-SEQ-ANA_0001:1:1:5165:8271#0/1 0 PCRPRIMER1+2 1 255 37M * 0 0 AATGATACGGCGACCACCGAGATCTACACTCTTTCCC CC=CC>CC:@CCC@C>BABAAA88?>>ABBB:AA XA:i:0 MD:Z:37 NM:i:0
+GEN-SEQ-ANA_0001:1:1:3890:20139#0/1 0 PCRPRIMER2+1 34 255 23M * 0 0 TAATGATACGGCGACCACCGAGA CCCCCC8A XA:i:0 MD:Z:48 NM:i:0
+GEN-SEQ-ANA_0001:1:1:16810:9063#0/1 0 PCRPRIMER2+1 34 255 45M * 0 0 TAATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGA CCCCCCCCBCCCBBCCCCCCCCCCCCCCBA=@CCCCBC5B@9@@@ XA:i:0 MD:Z:45 NM:i:0
+GEN-SEQ-ANA_0001:1:1:1127:14386#0/1 4 * 0 0 * * 0 0 TGAATTCATGAATGGAAAAA ?ACC<@>69@6>@BB?@:== XM:i:0
+GEN-SEQ-ANA_0001:1:1:1210:17845#0/1 4 * 0 0 * * 0 0 GAGATAATTCAAAATCTTGCCGAT >===>>@@@=<89A6>AB?>B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1227:4069#0/1 4 * 0 0 * * 0 0 TCGAGAATTCTTTATCAACCTCGTT =CCCCCBC6@BCCC=9BA7A>>BB> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1309:16895#0/1 4 * 0 0 * * 0 0 GTACAAATTCCCTCATTCTTGGATTTGTA CA>CC@BBC8C@C>=C5; XM:i:0
+GEN-SEQ-ANA_0001:1:1:1290:1939#0/1 4 * 0 0 * * 0 0 TACGTAATTCCGTGTCGGAGCCCGT ??B:6>B;>AB=>B>B:BB>?A<6? XM:i:0
+GEN-SEQ-ANA_0001:1:1:1311:12611#0/1 4 * 0 0 * * 0 0 AAATTCGTTTATTTTTGTATTT BBBBB@@@@@@@B:====:BBA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1312:13847#0/1 4 * 0 0 * * 0 0 CGATAAATTCCTTGCTTCAACGATGCA ?BB>=8CC@??CCCC@B?>CC:CCC:8 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1327:14127#0/1 4 * 0 0 * * 0 0 ATAAATTCAGAATAGTTTCACT =A:??B>=;B@A;97C>B@=6B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1328:16612#0/1 4 * 0 0 * * 0 0 GAGATAATTCTTCTCATCGG @@@8@;C@C@CC=BC=>=AA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1339:13574#0/1 4 * 0 0 * * 0 0 TTCTGTGATAATTTCAATATTGCC CB8B>=B=ABBBB:=BABBB@>=6 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1343:12008#0/1 4 * 0 0 * * 0 0 TTCGGCACTTTTCAGCACAATTTTCA CA=ACC@CC@BB=?AC??CC?CC?5CBC?8?C=?ACBC>CCBB@=CAA8: XM:i:0
+GEN-SEQ-ANA_0001:1:1:1358:3384#0/1 4 * 0 0 * * 0 0 AACAAACCATGTTTTGCCCTCAAACAG B8:BBBBB:BBB=BBBA@BB:9 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1361:15054#0/1 4 * 0 0 * * 0 0 ATGCTAATTCAGTAGCTATTGTTCTCCAAAT BCC:?CA?C?=CC?ABC@C XM:i:0
+GEN-SEQ-ANA_0001:1:1:1362:13972#0/1 4 * 0 0 * * 0 0 GTAATTCTTCGCGAGTCTTTG @>C?CA?:@??@@@@CC?C?B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1363:2154#0/1 4 * 0 0 * * 0 0 AGCTGAATTCTAGAAATGAAGAATATTTTTATTTATTTTCAG CB@C?B;CC=CAAC@@>:C?@:???CCBB>8>>>>?C>6>A? XM:i:0
+GEN-SEQ-ANA_0001:1:1:1373:2799#0/1 4 * 0 0 * * 0 0 GAGATAATTCAAAACTACTACTGTAGAGGGGTACTG ?BABACCCC@B=<=@@?8B><@B@>BAAB9<@<>@B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1374:3530#0/1 4 * 0 0 * * 0 0 GAGATAATTCTTCCTCCTTAA AAAC8A=CA>CC@CC:B7ACC XM:i:0
+GEN-SEQ-ANA_0001:1:1:1379:7364#0/1 4 * 0 0 * * 0 0 CGCCAACCAAACCGATTGGCC =???A;CCC?CCABCB8>CCC XM:i:0
+GEN-SEQ-ANA_0001:1:1:1382:18005#0/1 4 * 0 0 * * 0 0 GTAGCGAGTGACTCCAGTATGGAT ?CBB>=BBB:@B;>><<><@>@?7 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1385:14548#0/1 4 * 0 0 * * 0 0 TTCCTATTCAACTCCTCAATT 8=9=AA:A@=?AA@@@:@?AA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1387:6982#0/1 4 * 0 0 * * 0 0 ATTCGCGCCGATTGGGATTTTTG 6B?>:A=:A?7:A>B=B=BB XM:i:0
+GEN-SEQ-ANA_0001:1:1:1392:4030#0/1 4 * 0 0 * * 0 0 AGCTGAATTCGAGCCCATTGATGA ?@6C@C@>BB<<<;95BA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1403:14145#0/1 4 * 0 0 * * 0 0 ATGCTAATTCTCGTGGAACTCG CA>C=CC@CA?8???7C7@=?? XM:i:0
+GEN-SEQ-ANA_0001:1:1:1404:13576#0/1 4 * 0 0 * * 0 0 CGATAAATTCTCTGAAAATGCATA >BB=:BBBB@B@=B@BB=BBAB;B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1408:13430#0/1 4 * 0 0 * * 0 0 GAACAATAAAACCTCCTGTC @8B5@AA?@A==AAA=AA6@ XM:i:0
+GEN-SEQ-ANA_0001:1:1:1410:6288#0/1 4 * 0 0 * * 0 0 TCCGAAAACCAAATTTTACTG 9?>B5:CBC:@C@CCB?CCCB XM:i:0
+GEN-SEQ-ANA_0001:1:1:1420:17657#0/1 4 * 0 0 * * 0 0 TTCAATGAGAGAATCCGCGACGAAAATGG CC?CCC?BB?C:CAAB5AB>BB>BA8> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1423:18328#0/1 4 * 0 0 * * 0 0 ATAAATTCATCGACAGGTACCTTCTAT <@@<>?B>>AB=>BBB>B=BBBA7>>6 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1426:4162#0/1 4 * 0 0 * * 0 0 GCTAATTCCTATGGTTATAACTCTAAAACGGGTC 7==8<>@9B@?=5ACACA>>BCC?>C=?B>9@8; XM:i:0
+GEN-SEQ-ANA_0001:1:1:1426:18217#0/1 4 * 0 0 * * 0 0 TCCTTCAAAAATCATCACTGAAA >7:>@>@AA=@A:A@6@7A;>55 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1428:17122#0/1 4 * 0 0 * * 0 0 GTACAAATTCATCTAATGGTCTAACTTTATA C?CC8CCCBA@@=C>AAABC8BC:CCBACCC XM:i:0
+GEN-SEQ-ANA_0001:1:1:1434:13892#0/1 4 * 0 0 * * 0 0 GTACAAATTCGCCAAGCGTTGGATTG A???88@:;=>=?@@>BBB6A>AA5> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1441:16403#0/1 4 * 0 0 * * 0 0 TGAATTCTGGCACTTCGGCGATT B>B@?BBB@@>BAAB: XM:i:0
+GEN-SEQ-ANA_0001:1:1:1445:8376#0/1 4 * 0 0 * * 0 0 TGAATATGAAATTCGTAAATAGCGTGTT @@CCC@>CBCC?CCC@BBBBBBBB> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1451:7415#0/1 4 * 0 0 * * 0 0 CTCTTAATTCAAGTCCATATTCTCCTTGTAACCCCAAGTGATACAATATTTGATT BBA@;CCAC>@@CC>CBCC?BCCBBCAAAABCCCBCCCBC@CACAC;CC< XM:i:0
+GEN-SEQ-ANA_0001:1:1:1455:18050#0/1 4 * 0 0 * * 0 0 AGCTGAATTCCAGACATTATAGAAAA BCCCC@=B@>>?C>CCCC?:CC:C=> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1455:12928#0/1 4 * 0 0 * * 0 0 TCGAGAATTCCACATCACTTCTAT @?@@;CCCC@B:C:CBBB=6 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1456:13493#0/1 4 * 0 0 * * 0 0 TTCATATTGGGTTTCTACCGCGAC AB>B=?AA>B@BB>=BAAB=BBB> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1456:4776#0/1 4 * 0 0 * * 0 0 ATGCTAATTCACATTCATGATT CCCA=A@C@@@C@C5C???BCCBCCC@?CC XM:i:0
+GEN-SEQ-ANA_0001:1:1:1469:15421#0/1 4 * 0 0 * * 0 0 CGATAAATTCTGCCGGCAGAACTACGCATTT AC;>CACCB8?CCA;ACBCBBC?CCAC=CCC XM:i:0
+GEN-SEQ-ANA_0001:1:1:1472:17572#0/1 4 * 0 0 * * 0 0 TTCTACCCGGAACTCTTAAACAGAAGCGAGGC CC>B@@:BB>>>B>BC=CBC@8>CCC=C XM:i:0
+GEN-SEQ-ANA_0001:1:1:1473:13352#0/1 4 * 0 0 * * 0 0 TCTGTGGAATTAACAATCACAATAACTAAAG C?@BC@B@@=@=@@BBBB>B?>B= XM:i:0
+GEN-SEQ-ANA_0001:1:1:1481:5276#0/1 4 * 0 0 * * 0 0 GTACAAATTCGAATGAAATCATA CCCCBBCDB@C=CDACCC=C?C> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1482:10064#0/1 4 * 0 0 * * 0 0 CGATAAATTCAATTTGAAAA CC=CCC6C@;CCCBCAB?9B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1484:2325#0/1 4 * 0 0 * * 0 0 AATTCCAACCACCATTGGAA @A@=>@C??@C7CB@B6?:? XM:i:0
+GEN-SEQ-ANA_0001:1:1:1487:6701#0/1 4 * 0 0 * * 0 0 CTCATTCATCTAGATATTTCTGATTTATCGTTGTTC @9> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1487:3645#0/1 4 * 0 0 * * 0 0 CTAATTCGTGTAATAATTGTGTT >87?@@>><9C:@7B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1488:9645#0/1 4 * 0 0 * * 0 0 ATGCTAATTCGAAGTGCGTATCTCTCGGT ACCCCC8C@>>BBB=BB=A>BBBBB6B?> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1489:3252#0/1 4 * 0 0 * * 0 0 TAAATTCTGAAAAATGTATTT @<@@B@@=?C:C>>>B@A6A8 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1492:14525#0/1 4 * 0 0 * * 0 0 AACTTCGAATATTAACAACA @7:@>@@5C@>@@@C@@?B@ XM:i:0
+GEN-SEQ-ANA_0001:1:1:1493:11409#0/1 4 * 0 0 * * 0 0 CGAAACGGGCGTTGAGGCGTTTT >=B6BBBBAB@=BB>BB XM:i:0
+GEN-SEQ-ANA_0001:1:1:1494:17779#0/1 4 * 0 0 * * 0 0 TTTTCCTGAAAAATGTTCAACCTGATTATT :;A;:?:A>A8A:A=?AAA@@:=55AA6@@ XM:i:0
+GEN-SEQ-ANA_0001:1:1:1495:12537#0/1 4 * 0 0 * * 0 0 GTACAAATTCACAATAATTA BCCCBCCBCA;CCCBB@>>8 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1495:14319#0/1 4 * 0 0 * * 0 0 ATTCGCCAAATTTGGCGTTAG CCCCACBBCB@CCCACA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1496:18187#0/1 4 * 0 0 * * 0 0 AGGCGCGGCCAGCGGCTGGG <:B<<6B6<>A?BBB:5A6A XM:i:0
+GEN-SEQ-ANA_0001:1:1:1496:10780#0/1 4 * 0 0 * * 0 0 GAAATTCCCAAGAAACCCAAG C:?7@??CACCBCCC=8AA:6 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1497:8211#0/1 4 * 0 0 * * 0 0 CGATAAATTCCAAAGTTTGTGAATTT BCCCCC?C@?<::A:?AA?AB?:B@< XM:i:0
+GEN-SEQ-ANA_0001:1:1:1498:13789#0/1 4 * 0 0 * * 0 0 ATGCTAATTCTTTCGGAAACGAGACCATCTGA >C8C>CC>B@==C@AC?C8@B?:6;AA@BBA@ XM:i:0
+GEN-SEQ-ANA_0001:1:1:1502:11211#0/1 4 * 0 0 * * 0 0 ATTCGTACTCGTGATTTTCGAACAATCCAGT @AB=CCCCCCCBBA;CACCCB@A@C==C@CA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1506:3772#0/1 4 * 0 0 * * 0 0 CCATAAAATTATTGTCAATTTTCAAT ??:@?:BC:C?CC>C=@>BCCC:: XM:i:0
+GEN-SEQ-ANA_0001:1:1:1506:13304#0/1 4 * 0 0 * * 0 0 AATTCGACATCGCAAGTGAG AAA?>?CC>CBB?CBCBCA> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1506:17897#0/1 4 * 0 0 * * 0 0 TGAATTCCGCCTTGGAGACAATGTTCT B:6>?:B=B=BB>=?BBA XM:i:0
+GEN-SEQ-ANA_0001:1:1:1507:13225#0/1 4 * 0 0 * * 0 0 TGCTAATTCTAATTTTCATGTATACTAAACCCCCAC @?C>B8@<9B?C=CB5B66A<6?A>AA8?A<BAAA=?B???6BB=> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1510:9250#0/1 4 * 0 0 * * 0 0 AAAAACTGAAAAATTAAGATTAAAAGC A=?A6AA@?AAA@@AA:A8A@:>8AA5 XM:i:0
+GEN-SEQ-ANA_0001:1:1:1510:4740#0/1 4 * 0 0 * * 0 0 CTCTTAATTCATTTTTTTTTCAGTTTTAAAGGTG BC;BBCCCC@>CC;CCB>8>=BBB6BBBB=B?=B XM:i:0
+GEN-SEQ-ANA_0001:1:1:1511:8709#0/1 4 * 0 0 * * 0 0 TTCACTTTCTATGGTTATTTT CC@=ACCCCCB?CCBBCCC=> XM:i:0
+GEN-SEQ-ANA_0001:1:1:1513:14094#0/1 4 * 0 0 * * 0 0 AGCTGAATTCCAATTCTCTATCACGATT BC8CCBCC@;CCC8CCCCCACCCAB@@@ XM:i:0
+GEN-SEQ-ANA_0001:1:1:1515:16388#0/1 4 * 0 0 * * 0 0 AGCTGAATTCGCCAAGCGTTGGAT BC;CCC@C?;=CCCCCCC@C;C@C XM:i:0
+GEN-SEQ-ANA_0001:1:1:1516:14913#0/1 4 * 0 0 * * 0 0 CTAATTCGTCCAGATTTCCA C:BCCB?CCBBACCBBCCCC XM:i:0
+GEN-SEQ-ANA_0001:1:1:1517:17176#0/1 4 * 0 0 * * 0 0 CGATAAATTCTAATTTTGCCA >C<@CBA9B?BB?CBCABCCB XM:i:0
+GEN-SEQ-ANA_0001:1:1:1520:6860#0/1 4 * 0 0 * * 0 0 GTACAAATTCTTGAGAGACAGAATT C??C;CCCC?CCCCCCC@BC@C?AC XM:i:0
diff -r 71eaf2e85ae7 -r f0d19a935325 test-data/testout.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/testout.txt Tue Nov 20 16:01:20 2012 -0500
@@ -0,0 +1,29 @@
+reads 100
+version 1.32
+mapped reads 11
+mapped bases 377
+phred 33
+forward 0
+reverse 11
+len max 48
+len mean 34.2727
+len stdev 9.4137
+mapq mean 255.0000
+mapq stdev 0.0000
+mapq Q1 255.00
+mapq median 255.00
+mapq Q3 255.00
+snp rate 0.000000
+pct mismatch 0.0000
+base qual mean 31.5650
+base qual stdev 3.2627
+%A 27.5862
+%C 34.4828
+%G 16.7109
+%T 21.2202
+%PCRPRIMER1 8.753316
+%PCRPRIMER1+1 45.358090
+%PCRPRIMER1+2 9.814324
+%PCRPRIMER2+1 36.074271
+num ref seqs 4
+num ref aligned 4