Next changeset 1:4cd611004dc4 (2015-11-11) |
Commit message:
Imported from capsule None |
added:
pgSnp2gd_snp.pl pgSnp2gd_snp.xml test-data/pgSnp2snp_output.txt test-data/pgSnpTest.ref.txt |
b |
diff -r 000000000000 -r d281062566f9 pgSnp2gd_snp.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pgSnp2gd_snp.pl Mon Jul 28 11:55:36 2014 -0400 |
[ |
@@ -0,0 +1,208 @@ +#!/usr/bin/perl -w +use strict; + +#convert from pgSnp file to snp table (Webb format?) + +#snp table format: +#1. chr +#2. position (0 based) +#3. ref allele +#4. second allele +#5. overall quality +#foreach individual (6-9, 10-13, ...) +#a. count of allele in 3 +#b. count of allele in 4 +#c. genotype call (-1, or count of ref allele) +#d. quality of genotype call (quality of non-ref allele from masterVar) + +if (!@ARGV) { + print "usage: pgSnp2gd_snp.pl file.pgSnp[.gz|.bz2] [-tab=snpTable.txt -addColsOnly -build=hg19 -name=na -ref=#1based -chr=#1based ] > newSnpTable.txt\n"; + exit; +} + +my $in = shift @ARGV; +my $tab; +my $tabOnly; +my $build; +my $name; +my $ref; +my $binChr = 1; #position of chrom column, indicates if bin is added +foreach (@ARGV) { + if (/-tab=(.*)/) { $tab = $1; } + elsif (/-addColsOnly/) { $tabOnly = 1; } + elsif (/-build=(.*)/) { $build = $1; } + elsif (/-name=(.*)/) { $name = $1; } + elsif (/-ref=(\d+)/) { $ref = $1 - 1; } #go to index + elsif (/-chr=(\d+)/) { $binChr = $1; } +} + +if ($binChr == 2 && $ref) { $ref--; } #shift over by 1, we will delete bin +if ((!$tab or !$tabOnly) && !$ref) { + print "Error the reference allele must be in a column in the file if not just adding to a previous SNP table.\n"; + exit; +} + +#WARNING loads snp table in memory, this could take > 1G ram +my %old; +my $colcnt = 0; +my @head; +if ($tab) { + open(FH, $tab) or die "Couldn't open $tab, $!\n"; + while (<FH>) { + chomp; + if (/^#/) { push(@head, $_); next; } + my @f = split(/\t/); + $old{"$f[0]:$f[1]"} = join("\t", @f); + $colcnt = scalar @f; + } + close FH or die "Couldn't close $tab, $!\n"; +} + +if ($in =~ /.gz$/) { + open(FH, "zcat $in |") or die "Couldn't open $in, $!\n"; +}elsif ($in =~ /.bz2$/) { + open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n"; +}else { + open(FH, $in) or die "Couldn't open $in, $!\n"; +} +prepHeader(); +if (@head) { #keep old header, add new? + print join("\n", @head), "\n"; +} +while (<FH>) { + chomp; + if (/^#/) { next; } + if (/^\s*$/) { next; } + my @f = split(/\t/); + if ($binChr == 2) { #must have a bin column prepended on the beginning + shift @f; #delete it + } + if (!$f[3]) { next; } #WHAT? most likely still zipped? + if ($f[4] > 2) { next; } #can only do cases of 2 alleles + if ($f[2] == $f[1] or $f[2] - $f[1] != 1) { next; } #no indels + if ($f[3] =~ /-/) { next; } #no indels + #if creating a new table need the reference allele in a column + if (%old && $old{"$f[0]:$f[1]"}) { + my @o = split(/\t/, $old{"$f[0]:$f[1]"}); + my $freq = 0; + my $freq2 = 0; + my $sc; + my $g = 1; #genotype == ref allele count + if ($f[4] == 1) { #should be homozygous + if ($f[3] eq $o[2]) { $g = 2; $freq = $f[5]; } + elsif ($f[3] eq $o[3]) { $g = 0; $freq2 = $f[5]; } + else { next; } #doesn't match either allele, skip + $sc = $f[6]; + }else { + my $a = 0; #index of a alleles, freq, scores + my $b = 1; #same for b + my @all = split(/\//, $f[3]); + if ($o[2] ne $all[0] && $o[2] ne $all[1]) { next; } #must match one + if ($o[3] ne $all[0] && $o[3] ne $all[1]) { next; } + if ($o[2] eq $all[1]) { #switch indexes + $a = 1; + $b = 0; + } + my @fr = split(/,/, $f[5]); + $freq = $fr[$a]; + $freq2 = $fr[$b]; + my @s = split(/,/, $f[6]); + $sc = $s[$b]; + } + #print old + print $old{"$f[0]:$f[1]"}; + #add new columns + print "\t$freq\t$freq2\t$g\t$sc\n"; + $old{"$f[0]:$f[1]"} = ''; + }elsif (!$tabOnly) { #new table, or don't have this SNP + #need reference allele + if ($f[3] !~ /$f[$ref]/ && $f[4] == 2) { next; } #no reference allele + my $freq = 0; + my $freq2 = 0; + my $sc; + my $g = 1; #genotype == ref allele count + my $alt; + if ($f[4] == 1) { #should be homozygous + if ($f[3] eq $f[$ref]) { $g = 2; $freq = $f[5]; $alt = 'N'; } + else { $g = 0; $freq2 = $f[5]; $alt = $f[3]; } #matches alternate + $sc = $f[6]; + }else { + my $a = 0; #index of a alleles, freq, scores + my $b = 1; #same for b + my @all = split(/\//, $f[3]); + if ($f[$ref] ne $all[0] && $f[$ref] ne $all[1]) { next; } #must match one + if ($f[$ref] eq $all[1]) { #switch indexes + $a = 1; + $b = 0; + } + my @fr = split(/,/, $f[5]); + $freq = $fr[$a]; + $freq2 = $fr[$b]; + my @s = split(/,/, $f[6]); + $sc = $s[$b]; + $alt = $all[$b]; + } + #print initial columns + print "$f[0]\t$f[1]\t$f[$ref]\t$alt\t-1"; + #pad for other individuals if needed + my $i = 5; + while ($i < $colcnt) { + print "\t-1\t-1\t-1\t-1"; + $i += 4; + } + #add new columns + print "\t$freq\t$freq2\t$g\t$sc\n"; + } +} +close FH or die "Couldn't close $in, $!\n"; + +#if adding to a snp table, now we need to finish those not in the latest set +foreach my $k (keys %old) { + if ($old{$k} ne '') { #not printed yet + print $old{$k}, "\t-1\t-1\t-1\t-1\n"; #plus blank for this one + } +} + +exit; + +#parse old header and add or create new +sub prepHeader { + if (!$build) { $build = 'hg19'; } #set default + my @cnames; + my @ind; + my $n; + if (@head) { #parse previous header + my $h = join("", @head); #may split between lines + if ($h =~ /"column_names":\[(.*?)\]/) { + my @t = split(/,/, $1); + foreach (@t) { s/"//g; } + @cnames = @t; + $n = $cnames[$#cnames]; + $n =~ s/Q//; + $n++; + } + if ($h =~ /"dbkey":"(.*?)"/) { $build = $1; } + if ($h =~ /"individuals":\[(.*)\]/) { + my $t = $1; + $t =~ s/\]\].*/]/; #remove if there is more categories + @ind = split(/,/, $t); + } + }else { #start new header + @cnames = ("chr", "pos", "A", "B", "Q"); + $n = 1; + } + #add current + if (!$name) { $name= 'na'; } + my $stcol = $colcnt + 1; + if ($stcol == 1) { $stcol = 6; } #move past initial columns + push(@ind, "[\"$name\",$stcol]"); + push(@cnames, "${n}A", "${n}B", "${n}G", "${n}Q"); + #reassign head + undef @head; + foreach (@cnames) { $_ = "\"$_\""; } #quote name + $head[0] = "#{\"column_names\":[" . join(",", @cnames) . "],"; + $head[1] = "#\"individuals\":[" . join(",", @ind) . "],"; + $head[2] = "#\"dbkey\":\"$build\",\"pos\":2,\"rPos\":2,\"ref\":1,\"scaffold\":1,\"species\":\"$build\"}"; +} +####End + |
b |
diff -r 000000000000 -r d281062566f9 pgSnp2gd_snp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pgSnp2gd_snp.xml Mon Jul 28 11:55:36 2014 -0400 |
b |
@@ -0,0 +1,97 @@ +<tool id="pgSnp2gd_snp" name="pgSnp to gd_snp" hidden="false" version="1.0.0"> + <description>Convert from pgSnp to gd_snp</description> + <command interpreter="perl"> + #if $snptab.tab2 == "yes" + #if $snptab.colsOnly == "addColsOnly" #pgSnp2gd_snp.pl $input1 -tab=$snptab.input2 -name=$indName -build=${input1.metadata.dbkey} -addColsOnly -chr=${input1.metadata.chromCol} > $out_file1 + #else #pgSnp2gd_snp.pl $input1 -tab=$snptab.input2 -name=$indName -build=${input1.metadata.dbkey} -ref=${ref} -chr=${input1.metadata.chromCol} > $out_file1 + #end if + #else #pgSnp2gd_snp.pl $input1 -name=$indName -build=${input1.metadata.dbkey} -ref=${ref} -chr=${input1.metadata.chromCol} > $out_file1 + #end if + </command> + <inputs> + <param format="tab" name="input1" type="data" label="pgSnp dataset" /> + <conditional name="snptab"> + <param name="tab2" type="select" label="Append to gd_snp dataset in history"> + <option value="yes">yes</option> + <option value="no" selected="true">no</option> + </param> + <when value="yes"> + <param format="gd_snp" name="input2" type="data" label="gd_snp dataset" /> + <conditional name="needRef"> + <param name="colsOnly" type="select" label="Skip new SNPs"> + <option value="no" selected="true">no</option> + <option value="addColsOnly">yes</option> + </param> + <when value="no"> + <param name="ref" type="data_column" data_ref="input1" label="Column with reference allele" /> + </when> + <when value="addColsOnly"> <!-- do nothing --> + </when> + </conditional> + </when> + <when value="no"> + <param name="ref" type="data_column" data_ref="input1" label="Column with reference allele" /> + </when> + </conditional> + <param name="indName" type="text" size="20" label="Label for new individual/group" value="na" /> + </inputs> + <outputs> + <data format="gd_snp" name="out_file1" /> + </outputs> + <tests> + <test> + <param name='input1' value='pgSnpTest.ref.txt' ftype='interval' /> + <param name='tab2' value='no' /> + <param name='ref' value='8' /> + <param name='indName' value='na' /> + <output name="output" file="pgSnp2snp_output.txt" /> + </test> + </tests> + + <help> + +**Dataset formats** + +The input dataset is of Galaxy datatype interval_, with the additional columns +required for pgSnp_ format. +Any further columns beyond those defined for pgSnp will be ignored. +The output dataset is a gd_snp_ table. (`Dataset missing?`_) + +.. _interval: ./static/formatHelp.html#interval +.. _pgSnp: ./static/formatHelp.html#pgSnp +.. _gd_snp: ./static/formatHelp.html#gd_snp +.. _Dataset missing?: ./static/formatHelp.html + +----- + +**What it does** + +This tool converts a pgSnp dataset to gd_snp format, either starting a new +dataset or appending to an old one. When appending, +if any new SNPs appear only in the pgSnp file they can either be skipped entirely, or +backfilled with "-1" (meaning "unknown") for previous individuals/groups in the +input gd_snp dataset. +If any new SNPs are being added (either by creating a new table or by backfilling), +then an extra column with the reference allele must be supplied in the pgSnp dataset, +as shown in the example below. + +----- + +**Example** + +- input pgSnp file, with reference allele added:: + + chr1 1888681 1888682 C/T 2 4,3 0.8893,0.8453 T + chr1 3118325 3118326 T 1 8 0.8796 C + chr1 3211457 3211458 A/C 2 17,10 0.8610,0.8576 A + etc. + +- gd_snp output:: + + chr1 1888681 T C -1 3 4 1 0.8893 + chr1 3118325 C T -1 0 8 0 0.8796 + chr1 3211457 A C -1 17 10 1 0.8576 + etc. + +</help> +</tool> |
b |
diff -r 000000000000 -r d281062566f9 test-data/pgSnp2snp_output.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pgSnp2snp_output.txt Mon Jul 28 11:55:36 2014 -0400 |
[ |
b'@@ -0,0 +1,10002 @@\n+#{"column_names":["chr","pos","A","B","Q","1A","1B","1G","1Q"],\n+#"individuals":[["na",6]],\n+#"dbkey":"hg19","pos":2,"rPos":2,"ref":1,"scaffold":1,"species":"hg19"}\n+chr1\t1888681\tT\tC\t-1\t3\t4\t1\t0.8893\n+chr1\t3118325\tC\tT\t-1\t0\t8\t0\t0.8796\n+chr1\t3211457\tA\tC\t-1\t17\t10\t1\t0.8576\n+chr1\t3222692\tG\tA\t-1\t10\t14\t1\t0.8694\n+chr1\t3281273\tG\tA\t-1\t5\t4\t1\t0.8210\n+chr1\t3630475\tC\tT\t-1\t12\t12\t1\t0.8430\n+chr1\t4599834\tC\tT\t-1\t0\t18\t0\t0.8797\n+chr1\t5287107\tT\tG\t-1\t0\t6\t0\t0.8443\n+chr1\t5307765\tC\tT\t-1\t22\t12\t1\t0.8505\n+chr1\t5341621\tA\tC\t-1\t5\t3\t1\t0.9001\n+chr1\t5478586\tA\tG\t-1\t0\t11\t0\t0.8997\n+chr1\t5680961\tA\tC\t-1\t16\t10\t1\t0.8135\n+chr1\t5736051\tG\tA\t-1\t34\t57\t1\t0.8655\n+chr1\t5831207\tC\tT\t-1\t5\t10\t1\t0.8575\n+chr1\t6200884\tA\tC\t-1\t0\t13\t0\t0.8346\n+chr1\t6307254\tT\tC\t-1\t0\t11\t0\t0.8723\n+chr1\t6621228\tG\tT\t-1\t10\t11\t1\t0.8336\n+chr1\t7094424\tG\tA\t-1\t0\t4\t0\t0.8578\n+chr1\t7392593\tC\tT\t-1\t15\t25\t1\t0.8644\n+chr1\t7500089\tC\tG\t-1\t0\t13\t0\t0.8508\n+chr1\t7911301\tC\tT\t-1\t8\t9\t1\t0.8736\n+chr1\t7954646\tC\tT\t-1\t12\t8\t1\t0.8490\n+chr1\t8027386\tC\tT\t-1\t15\t21\t1\t0.8528\n+chr1\t8549302\tC\tT\t-1\t8\t7\t1\t0.8633\n+chr1\t8932179\tG\tT\t-1\t13\t12\t1\t0.8594\n+chr1\t9016389\tC\tT\t-1\t0\t14\t0\t0.8590\n+chr1\t9494443\tC\tT\t-1\t0\t6\t0\t0.8567\n+chr1\t10615774\tT\tC\t-1\t0\t15\t0\t0.8684\n+chr1\t10622975\tC\tT\t-1\t0\t4\t0\t0.8001\n+chr1\t10782207\tC\tT\t-1\t0\t3\t0\t0.8402\n+chr1\t12238146\tG\tA\t-1\t9\t4\t1\t0.8560\n+chr1\t12429037\tA\tG\t-1\t8\t6\t1\t0.8863\n+chr1\t12669902\tT\tC\t-1\t24\t10\t1\t0.8495\n+chr1\t13189040\tA\tT\t-1\t8\t4\t1\t0.8868\n+chr1\t14208570\tG\tA\t-1\t8\t10\t1\t0.8646\n+chr1\t14501554\tG\tA\t-1\t26\t17\t1\t0.8646\n+chr1\t15063865\tG\tA\t-1\t18\t9\t1\t0.8571\n+chr1\t15286095\tA\tG\t-1\t0\t21\t0\t0.8600\n+chr1\t15441925\tC\tT\t-1\t0\t17\t0\t0.8402\n+chr1\t15712178\tG\tA\t-1\t0\t14\t0\t0.8694\n+chr1\t16207687\tC\tG\t-1\t0\t16\t0\t0.8512\n+chr1\t16314956\tT\tA\t-1\t0\t31\t0\t0.8639\n+chr1\t16476823\tC\tT\t-1\t5\t8\t1\t0.8898\n+chr1\t16730581\tC\tG\t-1\t0\t9\t0\t0.8232\n+chr1\t17053721\tG\tT\t-1\t0\t66\t0\t0.8916\n+chr1\t17053765\tG\tA\t-1\t11\t19\t1\t0.8246\n+chr1\t17408311\tG\tC\t-1\t0\t22\t0\t0.8786\n+chr1\t17498098\tT\tC\t-1\t0\t3\t0\t0.7863\n+chr1\t17715847\tG\tA\t-1\t0\t11\t0\t0.8438\n+chr1\t18857430\tC\tT\t-1\t17\t14\t1\t0.8586\n+chr1\t19092742\tG\tT\t-1\t16\t10\t1\t0.8678\n+chr1\t19525584\tT\tC\t-1\t0\t12\t0\t0.8614\n+chr1\t19921477\tC\tA\t-1\t4\t3\t1\t0.8781\n+chr1\t20044146\tC\tT\t-1\t5\t6\t1\t0.8847\n+chr1\t20269168\tT\tC\t-1\t10\t5\t1\t0.8936\n+chr1\t20947809\tC\tT\t-1\t0\t8\t0\t0.8416\n+chr1\t21135182\tC\tA\t-1\t0\t13\t0\t0.8384\n+chr1\t21941643\tG\tT\t-1\t0\t12\t0\t0.8389\n+chr1\t22324976\tC\tT\t-1\t9\t6\t1\t0.8158\n+chr1\t22708791\tT\tA\t-1\t12\t8\t1\t0.8313\n+chr1\t22904350\tA\tG\t-1\t10\t4\t1\t0.8523\n+chr1\t22928348\tA\tG\t-1\t0\t7\t0\t0.8629\n+chr1\t23039691\tG\tC\t-1\t0\t13\t0\t0.8484\n+chr1\t24392068\tC\tT\t-1\t0\t4\t0\t0.8425\n+chr1\t24851230\tA\tG\t-1\t19\t15\t1\t0.8807\n+chr1\t25158007\tA\tG\t-1\t7\t12\t1\t0.8625\n+chr1\t25250192\tC\tT\t-1\t5\t4\t1\t0.8827\n+chr1\t25349613\tA\tG\t-1\t0\t6\t0\t0.8963\n+chr1\t25891773\tC\tT\t-1\t9\t7\t1\t0.8869\n+chr1\t25989060\tG\tA\t-1\t5\t14\t1\t0.8271\n+chr1\t25996483\tA\tG\t-1\t25\t17\t1\t0.7880\n+chr1\t26172110\tA\tG\t-1\t24\t13\t1\t0.8740\n+chr1\t26275739\tT\tC\t-1\t16\t10\t1\t0.8335\n+chr1\t26436230\tC\tA\t-1\t14\t18\t1\t0.8299\n+chr1\t27081694\tG\tA\t-1\t10\t7\t1\t0.8297\n+chr1\t27215078\tT\tC\t-1\t5\t9\t1\t0.8749\n+chr1\t27527485\tA\tG\t-1\t3\t5\t1\t0.8867\n+chr1\t28266702\tC\tT\t-1\t4\t5\t1\t0.8812\n+chr1\t28273015\tT\tG\t-1\t0\t23\t0\t0.7710\n+chr1\t28548057\tG\tC\t-1\t0\t22\t0\t0.8518\n+chr1\t28817596\tA\tG\t-1\t0\t17\t0\t0.8790\n+chr1\t28833437\tG\tT\t-1\t3\t5\t1\t0.8702\n+chr1\t28848587\tG\tT\t-1\t11\t17\t1\t0.8546\n+chr1\t29140084\tC\tA\t-1\t10\t8\t1\t0.8493\n+chr1\t29788803\tT\tC\t-1\t0\t3\t0\t0.8525\n+chr1\t30086425\tA\tG\t-1\t15\t14\t1\t0.8948\n+chr1\t30214408\tG\tA\t-1\t0\t36\t0\t0.8861\n+chr1\t30311437\tC\tT\t-1\t0\t17\t0\t0.8655\n+chr1\t30577637\tG\tA\t-1\t0\t12\t0\t0.8266\n+chr1\t30764437\tC\tA\t-1\t0\t37\t0\t0.8368\n+chr1\t30830488\tC\tT\t-1\t11\t7\t1\t0.8011\n+chr1\t30999686\tC\tA\t-1\t0\t23\t0\t0.8255\n+chr1\t31004884\tA\tG\t-1\t0\t10\t0\t0.8221\n+chr1\t31082488\tG\tC\t-1\t13\t11\t1\t0.8683\n+chr1\t31084258\tT\tC\t-1\t10\t9\t1\t0.8195\n+chr1\t31145761\tT\tC\t-1\t0\t40\t0\t0.8697\n+chr1\t31858900\tT\tC\t-1\t0\t20\t0\t0.8667\n+chr1\t31950701\tG\tA\t-1\t12\t10\t1\t0.8389\n+chr1\t32334351\tT\tC\t-1\t0\t8\t0\t0.8713\n+chr1\t32689520\tA\tG\t-1\t21\t14\t1\t0.8584\n+chr1\t34054228\tA\tG\t-1\t0\t25\t0\t0.8579\n+chr1\t34137111\tA\tT\t-1\t0\t19\t0\t0.8811\n+chr1\t34255271\tT\tC\t-1\t21\t8\t1\t0.8586\n+chr1\t35503106\tT\tG\t-1\t30\t17\t1\t0.8577\n+chr1\t35568011\tT\tC\t-1\t0\t25\t0\t0.8800\n+chr1\t35594503\tC\tA\t-1\t3\t6\t1\t0.8923\n+chr1\t37411518\tG\tA\t-1\t6\t10\t1\t0.'..b'5\tA\tT\t-1\t6\t3\t1\t0.7937\n+chrX\t100557633\tT\tC\t-1\t0\t10\t0\t0.8602\n+chrX\t100963372\tT\tC\t-1\t0\t10\t0\t0.8248\n+chrX\t103117581\tC\tT\t-1\t0\t13\t0\t0.8609\n+chrX\t103862864\tT\tC\t-1\t0\t13\t0\t0.8374\n+chrX\t106721925\tC\tG\t-1\t0\t13\t0\t0.8762\n+chrX\t109408982\tT\tC\t-1\t0\t10\t0\t0.8833\n+chrX\t109754636\tT\tC\t-1\t0\t17\t0\t0.8492\n+chrX\t109761593\tA\tT\t-1\t0\t11\t0\t0.8431\n+chrX\t109907481\tC\tT\t-1\t0\t9\t0\t0.8497\n+chrX\t110015919\tC\tT\t-1\t0\t15\t0\t0.8467\n+chrX\t110264993\tA\tT\t-1\t0\t16\t0\t0.8362\n+chrX\t111281679\tG\tA\t-1\t0\t13\t0\t0.8744\n+chrX\t111364460\tA\tG\t-1\t0\t10\t0\t0.8721\n+chrX\t111368560\tG\tA\t-1\t0\t16\t0\t0.8444\n+chrX\t111531573\tC\tT\t-1\t0\t10\t0\t0.8944\n+chrX\t111720465\tG\tA\t-1\t0\t14\t0\t0.8430\n+chrX\t112170053\tA\tG\t-1\t0\t3\t0\t0.9248\n+chrX\t112459169\tG\tA\t-1\t0\t19\t0\t0.8654\n+chrX\t113279524\tA\tT\t-1\t0\t20\t0\t0.8246\n+chrX\t113324217\tA\tC\t-1\t0\t9\t0\t0.8407\n+chrX\t114056797\tT\tC\t-1\t0\t11\t0\t0.8067\n+chrX\t115018183\tA\tG\t-1\t0\t13\t0\t0.8356\n+chrX\t115733503\tG\tT\t-1\t0\t7\t0\t0.8198\n+chrX\t116139363\tA\tG\t-1\t0\t7\t0\t0.9105\n+chrX\t116488679\tA\tG\t-1\t0\t14\t0\t0.8564\n+chrX\t117358634\tG\tA\t-1\t0\t8\t0\t0.8613\n+chrX\t118498954\tT\tC\t-1\t0\t4\t0\t0.7954\n+chrX\t118648794\tG\tT\t-1\t0\t14\t0\t0.8734\n+chrX\t118693641\tC\tT\t-1\t0\t6\t0\t0.8303\n+chrX\t118869589\tA\tG\t-1\t0\t8\t0\t0.9096\n+chrX\t120306948\tA\tG\t-1\t0\t12\t0\t0.8546\n+chrX\t120448877\tC\tA\t-1\t0\t25\t0\t0.8910\n+chrX\t120872985\tC\tT\t-1\t0\t8\t0\t0.8337\n+chrX\t121680281\tT\tC\t-1\t0\t12\t0\t0.8836\n+chrX\t122435686\tC\tT\t-1\t0\t12\t0\t0.7835\n+chrX\t123397991\tG\tA\t-1\t0\t4\t0\t0.8356\n+chrX\t123907161\tT\tC\t-1\t0\t7\t0\t0.8753\n+chrX\t124719518\tT\tC\t-1\t0\t14\t0\t0.8631\n+chrX\t124768222\tC\tT\t-1\t0\t5\t0\t0.8839\n+chrX\t125632363\tT\tC\t-1\t0\t10\t0\t0.8800\n+chrX\t126053340\tC\tA\t-1\t0\t10\t0\t0.8392\n+chrX\t126195634\tG\tA\t-1\t0\t7\t0\t0.8718\n+chrX\t126716246\tT\tC\t-1\t0\t6\t0\t0.8763\n+chrX\t127367145\tG\tA\t-1\t0\t17\t0\t0.8456\n+chrX\t127457582\tC\tG\t-1\t0\t8\t0\t0.8587\n+chrX\t128605962\tC\tT\t-1\t0\t20\t0\t0.8591\n+chrX\t129569189\tG\tC\t-1\t0\t17\t0\t0.8282\n+chrX\t129811001\tC\tG\t-1\t0\t20\t0\t0.8687\n+chrX\t130409883\tA\tT\t-1\t0\t17\t0\t0.8126\n+chrX\t132344700\tT\tC\t-1\t0\t3\t0\t0.8623\n+chrX\t132915772\tC\tA\t-1\t0\t15\t0\t0.8894\n+chrX\t133907790\tC\tA\t-1\t0\t12\t0\t0.8533\n+chrX\t134518384\tA\tC\t-1\t0\t9\t0\t0.8196\n+chrX\t134766489\tA\tG\t-1\t15\t9\t1\t0.8127\n+chrX\t134985459\tA\tG\t-1\t0\t3\t0\t0.8540\n+chrX\t135190793\tA\tG\t-1\t0\t17\t0\t0.8961\n+chrX\t136655716\tT\tG\t-1\t0\t6\t0\t0.8716\n+chrX\t136733809\tT\tC\t-1\t0\t15\t0\t0.8669\n+chrX\t137469231\tT\tC\t-1\t0\t18\t0\t0.8722\n+chrX\t137852981\tG\tT\t-1\t0\t11\t0\t0.8276\n+chrX\t139003000\tG\tA\t-1\t0\t13\t0\t0.8526\n+chrX\t139110302\tG\tA\t-1\t0\t9\t0\t0.8564\n+chrX\t139529487\tT\tC\t-1\t0\t18\t0\t0.8521\n+chrX\t139531472\tC\tT\t-1\t0\t11\t0\t0.8682\n+chrX\t139615864\tA\tG\t-1\t0\t9\t0\t0.8202\n+chrX\t139702889\tT\tC\t-1\t0\t3\t0\t0.8590\n+chrX\t140485622\tC\tG\t-1\t0\t3\t0\t0.8489\n+chrX\t140511684\tA\tG\t-1\t0\t13\t0\t0.8238\n+chrX\t140743414\tT\tC\t-1\t0\t4\t0\t0.8914\n+chrX\t141082368\tT\tC\t-1\t0\t12\t0\t0.8648\n+chrX\t141153066\tC\tG\t-1\t0\t7\t0\t0.8369\n+chrX\t141213396\tC\tA\t-1\t0\t4\t0\t0.8604\n+chrX\t142194135\tG\tA\t-1\t0\t10\t0\t0.8411\n+chrX\t142652846\tC\tT\t-1\t0\t19\t0\t0.8740\n+chrX\t142688444\tC\tA\t-1\t0\t6\t0\t0.8866\n+chrX\t142756403\tT\tC\t-1\t0\t18\t0\t0.8646\n+chrX\t144873543\tT\tC\t-1\t0\t12\t0\t0.8420\n+chrX\t145351956\tC\tT\t-1\t0\t5\t0\t0.8467\n+chrX\t145881542\tT\tG\t-1\t0\t3\t0\t0.8178\n+chrX\t145924332\tA\tG\t-1\t0\t9\t0\t0.8605\n+chrX\t145935548\tA\tC\t-1\t0\t14\t0\t0.7743\n+chrX\t146079383\tA\tC\t-1\t0\t3\t0\t0.8542\n+chrX\t146425710\tA\tG\t-1\t0\t9\t0\t0.8032\n+chrX\t146613499\tA\tC\t-1\t0\t11\t0\t0.8771\n+chrX\t147002991\tT\tG\t-1\t0\t4\t0\t0.8704\n+chrX\t149466820\tC\tT\t-1\t0\t15\t0\t0.8550\n+chrX\t149603761\tC\tA\t-1\t0\t3\t0\t0.8279\n+chrX\t150053854\tA\tC\t-1\t0\t11\t0\t0.8358\n+chrX\t150408360\tC\tT\t-1\t0\t11\t0\t0.8165\n+chrX\t150451895\tG\tA\t-1\t0\t23\t0\t0.8775\n+chrX\t150777182\tC\tT\t-1\t0\t16\t0\t0.8623\n+chrX\t150808405\tG\tT\t-1\t0\t15\t0\t0.8428\n+chrX\t150923430\tT\tG\t-1\t3\t3\t1\t0.8295\n+chrX\t150989778\tG\tC\t-1\t0\t8\t0\t0.8441\n+chrX\t152584270\tA\tG\t-1\t0\t7\t0\t0.8157\n+chrX\t152845290\tC\tT\t-1\t0\t12\t0\t0.8522\n+chrX\t153055432\tG\tA\t-1\t0\t17\t0\t0.8461\n+chrX\t153090059\tA\tG\t-1\t0\t3\t0\t0.8010\n+chrY\t4223441\tA\tG\t-1\t0\t8\t0\t0.7722\n+chrY\t9939199\tC\tT\t-1\t20\t13\t1\t0.8191\n+chrY\t9962146\tT\tG\t-1\t5\t3\t1\t0.8341\n+chrY\t9991556\tT\tG\t-1\t22\t31\t1\t0.8248\n+chrY\t10050627\tT\tC\t-1\t5\t4\t1\t0.8047\n+chrY\t10068141\tA\tT\t-1\t15\t20\t1\t0.8232\n+chrY\t13195465\tG\tT\t-1\t8\t8\t1\t0.8215\n+chrY\t13295972\tC\tT\t-1\t16\t10\t1\t0.8173\n+chrY\t13567253\tA\tC\t-1\t5\t6\t1\t0.8351\n+chrY\t18619115\tA\tC\t-1\t0\t4\t0\t0.8518\n+chrY\t59026174\tA\tG\t-1\t5\t13\t1\t0.8404\n' |
b |
diff -r 000000000000 -r d281062566f9 test-data/pgSnpTest.ref.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pgSnpTest.ref.txt Mon Jul 28 11:55:36 2014 -0400 |
b |
b'@@ -0,0 +1,10000 @@\n+chr1\t1888681\t1888682\tC/T\t2\t4,3\t0.8893,0.8453\tT\n+chr1\t3118325\t3118326\tT\t1\t8\t0.8796\tC\n+chr1\t3211457\t3211458\tA/C\t2\t17,10\t0.8610,0.8576\tA\n+chr1\t3222692\t3222693\tA/G\t2\t14,10\t0.8694,0.8619\tG\n+chr1\t3281273\t3281274\tA/G\t2\t4,5\t0.8210,0.8427\tG\n+chr1\t3630475\t3630476\tC/T\t2\t12,12\t0.8589,0.8430\tC\n+chr1\t4599834\t4599835\tT\t1\t18\t0.8797\tC\n+chr1\t5287107\t5287108\tG\t1\t6\t0.8443\tT\n+chr1\t5307765\t5307766\tC/T\t2\t22,12\t0.8485,0.8505\tC\n+chr1\t5341621\t5341622\tA/C\t2\t5,3\t0.8741,0.9001\tA\n+chr1\t5478586\t5478587\tG\t1\t11\t0.8997\tA\n+chr1\t5680961\t5680962\tA/C\t2\t16,10\t0.8671,0.8135\tA\n+chr1\t5736051\t5736052\tA/G\t2\t57,34\t0.8655,0.8676\tG\n+chr1\t5831207\t5831208\tC/T\t2\t5,10\t0.8978,0.8575\tC\n+chr1\t6200884\t6200885\tC\t1\t13\t0.8346\tA\n+chr1\t6307254\t6307255\tC\t1\t11\t0.8723\tT\n+chr1\t6621228\t6621229\tG/T\t2\t10,11\t0.8405,0.8336\tG\n+chr1\t7094424\t7094425\tA\t1\t4\t0.8578\tG\n+chr1\t7392593\t7392594\tC/T\t2\t15,25\t0.8653,0.8644\tC\n+chr1\t7500089\t7500090\tG\t1\t13\t0.8508\tC\n+chr1\t7911301\t7911302\tC/T\t2\t8,9\t0.9113,0.8736\tC\n+chr1\t7954646\t7954647\tC/T\t2\t12,8\t0.8365,0.8490\tC\n+chr1\t8027386\t8027387\tC/T\t2\t15,21\t0.8669,0.8528\tC\n+chr1\t8549302\t8549303\tC/T\t2\t8,7\t0.8606,0.8633\tC\n+chr1\t8932179\t8932180\tG/T\t2\t13,12\t0.8136,0.8594\tG\n+chr1\t9016389\t9016390\tT\t1\t14\t0.8590\tC\n+chr1\t9494443\t9494444\tT\t1\t6\t0.8567\tC\n+chr1\t10615774\t10615775\tC\t1\t15\t0.8684\tT\n+chr1\t10622975\t10622976\tT\t1\t4\t0.8001\tC\n+chr1\t10782207\t10782208\tT\t1\t3\t0.8402\tC\n+chr1\t12238146\t12238147\tA/G\t2\t4,9\t0.8560,0.8449\tG\n+chr1\t12429037\t12429038\tA/G\t2\t8,6\t0.8692,0.8863\tA\n+chr1\t12669902\t12669903\tC/T\t2\t10,24\t0.8495,0.8653\tT\n+chr1\t13189040\t13189041\tA/T\t2\t8,4\t0.8592,0.8868\tA\n+chr1\t14208570\t14208571\tA/G\t2\t10,8\t0.8646,0.8495\tG\n+chr1\t14501554\t14501555\tA/G\t2\t17,26\t0.8646,0.8663\tG\n+chr1\t15063865\t15063866\tA/G\t2\t9,18\t0.8571,0.8272\tG\n+chr1\t15286095\t15286096\tG\t1\t21\t0.8600\tA\n+chr1\t15441925\t15441926\tT\t1\t17\t0.8402\tC\n+chr1\t15712178\t15712179\tA\t1\t14\t0.8694\tG\n+chr1\t16207687\t16207688\tG\t1\t16\t0.8512\tC\n+chr1\t16314956\t16314957\tA\t1\t31\t0.8639\tT\n+chr1\t16476823\t16476824\tC/T\t2\t5,8\t0.8822,0.8898\tC\n+chr1\t16730581\t16730582\tG\t1\t9\t0.8232\tC\n+chr1\t17053721\t17053722\tT\t1\t66\t0.8916\tG\n+chr1\t17053765\t17053766\tA/G\t2\t19,11\t0.8246,0.8436\tG\n+chr1\t17408311\t17408312\tC\t1\t22\t0.8786\tG\n+chr1\t17498098\t17498099\tC\t1\t3\t0.7863\tT\n+chr1\t17715847\t17715848\tA\t1\t11\t0.8438\tG\n+chr1\t18857430\t18857431\tC/T\t2\t17,14\t0.8834,0.8586\tC\n+chr1\t19092742\t19092743\tG/T\t2\t16,10\t0.8682,0.8678\tG\n+chr1\t19525584\t19525585\tC\t1\t12\t0.8614\tT\n+chr1\t19921477\t19921478\tA/C\t2\t3,4\t0.8781,0.8884\tC\n+chr1\t20044146\t20044147\tC/T\t2\t5,6\t0.8571,0.8847\tC\n+chr1\t20269168\t20269169\tC/T\t2\t5,10\t0.8936,0.8839\tT\n+chr1\t20947809\t20947810\tT\t1\t8\t0.8416\tC\n+chr1\t21135182\t21135183\tA\t1\t13\t0.8384\tC\n+chr1\t21941643\t21941644\tT\t1\t12\t0.8389\tG\n+chr1\t22324976\t22324977\tC/T\t2\t9,6\t0.8524,0.8158\tC\n+chr1\t22708791\t22708792\tA/T\t2\t8,12\t0.8313,0.8557\tT\n+chr1\t22904350\t22904351\tA/G\t2\t10,4\t0.8451,0.8523\tA\n+chr1\t22928348\t22928349\tG\t1\t7\t0.8629\tA\n+chr1\t23039691\t23039692\tC\t1\t13\t0.8484\tG\n+chr1\t24392068\t24392069\tT\t1\t4\t0.8425\tC\n+chr1\t24851230\t24851231\tA/G\t2\t19,15\t0.8794,0.8807\tA\n+chr1\t25158007\t25158008\tA/G\t2\t7,12\t0.8350,0.8625\tA\n+chr1\t25250192\t25250193\tC/T\t2\t5,4\t0.8613,0.8827\tC\n+chr1\t25349613\t25349614\tG\t1\t6\t0.8963\tA\n+chr1\t25891773\t25891774\tC/T\t2\t9,7\t0.8398,0.8869\tC\n+chr1\t25989060\t25989061\tA/G\t2\t14,5\t0.8271,0.8143\tG\n+chr1\t25996483\t25996484\tA/G\t2\t25,17\t0.7803,0.7880\tA\n+chr1\t26172110\t26172111\tA/G\t2\t24,13\t0.8568,0.8740\tA\n+chr1\t26275739\t26275740\tC/T\t2\t10,16\t0.8335,0.8656\tT\n+chr1\t26436230\t26436231\tA/C\t2\t18,14\t0.8299,0.8245\tC\n+chr1\t27081694\t27081695\tA/G\t2\t7,10\t0.8297,0.8288\tG\n+chr1\t27215078\t27215079\tC/T\t2\t9,5\t0.8749,0.8190\tT\n+chr1\t27527485\t27527486\tA/G\t2\t3,5\t0.8697,0.8867\tA\n+chr1\t28266702\t28266703\tC/T\t2\t4,5\t0.8513,0.8812\tC\n+chr1\t28273015\t28273016\tG\t1\t23\t0.7710\tT\n+chr1\t28548057\t28548058\tC\t1\t22\t0.8518\tG\n+chr1\t28817596\t28817597\tG\t1\t17\t0.8790\tA\n+chr1\t28833437\t28833438\tG/T\t2\t3,5\t0.8753,0.8702\tG\n+chr1\t28848587\t28848588\tG/T\t2\t11,17\t0.8629,0.8546\tG\n+chr1\t29140084\t29140085\tA/C\t2\t8,10\t0.8493,0.8580\tC\n+chr1\t29788803\t29788804\tC\t1\t3\t0.8525\tT\n+chr1\t30086425\t30086426\tA/G\t2\t15,14\t0.8973,0.8948\tA\n+chr1\t30214408\t30214409\tA\t1\t36\t0.8861\tG\n+'..b'+chrX\t111720465\t111720466\tA\t1\t14\t0.8430\tG\n+chrX\t112170053\t112170054\tG\t1\t3\t0.9248\tA\n+chrX\t112459169\t112459170\tA\t1\t19\t0.8654\tG\n+chrX\t113279524\t113279525\tT\t1\t20\t0.8246\tA\n+chrX\t113324217\t113324218\tC\t1\t9\t0.8407\tA\n+chrX\t114056797\t114056798\tC\t1\t11\t0.8067\tT\n+chrX\t115018183\t115018184\tG\t1\t13\t0.8356\tA\n+chrX\t115733503\t115733504\tT\t1\t7\t0.8198\tG\n+chrX\t116139363\t116139364\tG\t1\t7\t0.9105\tA\n+chrX\t116488679\t116488680\tG\t1\t14\t0.8564\tA\n+chrX\t117358634\t117358635\tA\t1\t8\t0.8613\tG\n+chrX\t118498954\t118498955\tC\t1\t4\t0.7954\tT\n+chrX\t118648794\t118648795\tT\t1\t14\t0.8734\tG\n+chrX\t118693641\t118693642\tT\t1\t6\t0.8303\tC\n+chrX\t118869589\t118869590\tG\t1\t8\t0.9096\tA\n+chrX\t120306948\t120306949\tG\t1\t12\t0.8546\tA\n+chrX\t120448877\t120448878\tA\t1\t25\t0.8910\tC\n+chrX\t120872985\t120872986\tT\t1\t8\t0.8337\tC\n+chrX\t121680281\t121680282\tC\t1\t12\t0.8836\tT\n+chrX\t122435686\t122435687\tT\t1\t12\t0.7835\tC\n+chrX\t123397991\t123397992\tA\t1\t4\t0.8356\tG\n+chrX\t123907161\t123907162\tC\t1\t7\t0.8753\tT\n+chrX\t124719518\t124719519\tC\t1\t14\t0.8631\tT\n+chrX\t124768222\t124768223\tT\t1\t5\t0.8839\tC\n+chrX\t125632363\t125632364\tC\t1\t10\t0.8800\tT\n+chrX\t126053340\t126053341\tA\t1\t10\t0.8392\tC\n+chrX\t126195634\t126195635\tA\t1\t7\t0.8718\tG\n+chrX\t126716246\t126716247\tC\t1\t6\t0.8763\tT\n+chrX\t127367145\t127367146\tA\t1\t17\t0.8456\tG\n+chrX\t127457582\t127457583\tG\t1\t8\t0.8587\tC\n+chrX\t128605962\t128605963\tT\t1\t20\t0.8591\tC\n+chrX\t129569189\t129569190\tC\t1\t17\t0.8282\tG\n+chrX\t129811001\t129811002\tG\t1\t20\t0.8687\tC\n+chrX\t130409883\t130409884\tT\t1\t17\t0.8126\tA\n+chrX\t132344700\t132344701\tC\t1\t3\t0.8623\tT\n+chrX\t132915772\t132915773\tA\t1\t15\t0.8894\tC\n+chrX\t133907790\t133907791\tA\t1\t12\t0.8533\tC\n+chrX\t134518384\t134518385\tC\t1\t9\t0.8196\tA\n+chrX\t134766489\t134766490\tA/G\t2\t15,9\t0.8509,0.8127\tA\n+chrX\t134985459\t134985460\tG\t1\t3\t0.8540\tA\n+chrX\t135190793\t135190794\tG\t1\t17\t0.8961\tA\n+chrX\t136655716\t136655717\tG\t1\t6\t0.8716\tT\n+chrX\t136733809\t136733810\tC\t1\t15\t0.8669\tT\n+chrX\t137469231\t137469232\tC\t1\t18\t0.8722\tT\n+chrX\t137852981\t137852982\tT\t1\t11\t0.8276\tG\n+chrX\t139003000\t139003001\tA\t1\t13\t0.8526\tG\n+chrX\t139110302\t139110303\tA\t1\t9\t0.8564\tG\n+chrX\t139529487\t139529488\tC\t1\t18\t0.8521\tT\n+chrX\t139531472\t139531473\tT\t1\t11\t0.8682\tC\n+chrX\t139615864\t139615865\tG\t1\t9\t0.8202\tA\n+chrX\t139702889\t139702890\tC\t1\t3\t0.8590\tT\n+chrX\t140485622\t140485623\tG\t1\t3\t0.8489\tC\n+chrX\t140511684\t140511685\tG\t1\t13\t0.8238\tA\n+chrX\t140743414\t140743415\tC\t1\t4\t0.8914\tT\n+chrX\t141082368\t141082369\tC\t1\t12\t0.8648\tT\n+chrX\t141153066\t141153067\tG\t1\t7\t0.8369\tC\n+chrX\t141213396\t141213397\tA\t1\t4\t0.8604\tC\n+chrX\t142194135\t142194136\tA\t1\t10\t0.8411\tG\n+chrX\t142652846\t142652847\tT\t1\t19\t0.8740\tC\n+chrX\t142688444\t142688445\tA\t1\t6\t0.8866\tC\n+chrX\t142756403\t142756404\tC\t1\t18\t0.8646\tT\n+chrX\t144873543\t144873544\tC\t1\t12\t0.8420\tT\n+chrX\t145351956\t145351957\tT\t1\t5\t0.8467\tC\n+chrX\t145881542\t145881543\tG\t1\t3\t0.8178\tT\n+chrX\t145924332\t145924333\tG\t1\t9\t0.8605\tA\n+chrX\t145935548\t145935549\tC\t1\t14\t0.7743\tA\n+chrX\t146079383\t146079384\tC\t1\t3\t0.8542\tA\n+chrX\t146425710\t146425711\tG\t1\t9\t0.8032\tA\n+chrX\t146613499\t146613500\tC\t1\t11\t0.8771\tA\n+chrX\t147002991\t147002992\tG\t1\t4\t0.8704\tT\n+chrX\t149466820\t149466821\tT\t1\t15\t0.8550\tC\n+chrX\t149603761\t149603762\tA\t1\t3\t0.8279\tC\n+chrX\t150053854\t150053855\tC\t1\t11\t0.8358\tA\n+chrX\t150408360\t150408361\tT\t1\t11\t0.8165\tC\n+chrX\t150451895\t150451896\tA\t1\t23\t0.8775\tG\n+chrX\t150777182\t150777183\tT\t1\t16\t0.8623\tC\n+chrX\t150808405\t150808406\tT\t1\t15\t0.8428\tG\n+chrX\t150923430\t150923431\tG/T\t2\t3,3\t0.8295,0.8593\tT\n+chrX\t150989778\t150989779\tC\t1\t8\t0.8441\tG\n+chrX\t152584270\t152584271\tG\t1\t7\t0.8157\tA\n+chrX\t152845290\t152845291\tT\t1\t12\t0.8522\tC\n+chrX\t153055432\t153055433\tA\t1\t17\t0.8461\tG\n+chrX\t153090059\t153090060\tG\t1\t3\t0.8010\tA\n+chrY\t4223441\t4223442\tG\t1\t8\t0.7722\tA\n+chrY\t9939199\t9939200\tC/T\t2\t20,13\t0.8045,0.8191\tC\n+chrY\t9962146\t9962147\tG/T\t2\t3,5\t0.8341,0.8751\tT\n+chrY\t9991556\t9991557\tG/T\t2\t31,22\t0.8248,0.8247\tT\n+chrY\t10050627\t10050628\tC/T\t2\t4,5\t0.8047,0.8279\tT\n+chrY\t10068141\t10068142\tA/T\t2\t15,20\t0.7954,0.8232\tA\n+chrY\t13195465\t13195466\tG/T\t2\t8,8\t0.8124,0.8215\tG\n+chrY\t13295972\t13295973\tC/T\t2\t16,10\t0.8060,0.8173\tC\n+chrY\t13567253\t13567254\tA/C\t2\t5,6\t0.8285,0.8351\tA\n+chrY\t18619115\t18619116\tC\t1\t4\t0.8518\tA\n+chrY\t59026174\t59026175\tA/G\t2\t5,13\t0.8138,0.8404\tA\n' |