annotate master2gd_snp.pl @ 3:edf12470a1a6 default tip

Bugfix from Belinda, in vcf2pgSnp.pl
author Cathy Riemer <cathy+hg@bx.psu.edu>
date Thu, 19 Mar 2015 12:06:34 -0400
parents 35c20b109be5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
1 #!/usr/bin/perl -w
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
2 use strict;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
3
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
4 #convert from master variant file to snp table (Webb format?)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
5 #new format for version 2.0, also different format for cancer normal pairs
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
6 #set columns for 2.0 version Cancer format
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
7 my $aCnt1 = 21;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
8 my $aCnt2 = 22;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
9
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
10 #snp table format:
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
11 #1. chr
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
12 #2. position (0 based)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
13 #3. ref allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
14 #4. second allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
15 #5. overall quality
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
16 #foreach individual (6-9, 10-13, ...)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
17 #a. count of allele in 3
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
18 #b. count of allele in 4
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
19 #c. genotype call (-1, or count of ref allele)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
20 #d. quality of genotype call (quality of non-ref allele from masterVar)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
21
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
22 if (!@ARGV) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
23 print "usage: master2gd_snp.pl masterVar.txt[.gz|.bz2] [-tab=snpTable.txt -addColsOnly -build=hg19 -name=na ] > newSnpTable.txt\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
24 exit;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
25 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
26
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
27 my $in = shift @ARGV;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
28 my $tab;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
29 my $tabOnly;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
30 my $build;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
31 my $name;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
32 foreach (@ARGV) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
33 if (/-tab=(.*)/) { $tab = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
34 elsif (/-addColsOnly/) { $tabOnly = 1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
35 elsif (/-build=(.*)/) { $build = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
36 elsif (/-name=(.*)/) { $name = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
37 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
38
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
39 #WARNING loads snp table in memory, this could take > 1G ram
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
40 my %old;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
41 my $colcnt = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
42 my @head;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
43 if ($tab) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
44 open(FH, $tab) or die "Couldn't open $tab, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
45 while (<FH>) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
46 chomp;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
47 if (/^#/) { push(@head, $_); next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
48 my @f = split(/\t/);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
49 $old{"$f[0]:$f[1]"} = join("\t", @f);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
50 $colcnt = scalar @f;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
51 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
52 close FH or die "Couldn't close $tab, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
53 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
54
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
55 if ($in =~ /.gz$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
56 open(FH, "zcat $in |") or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
57 }elsif ($in =~ /.bz2$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
58 open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
59 }else {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
60 open(FH, $in) or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
61 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
62 prepHeader();
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
63 if (@head) { #keep old header, add new?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
64 print join("\n", @head), "\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
65 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
66 while (<FH>) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
67 chomp;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
68 #FORMAT_VERSION 2.0
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
69 if (/^#FORMAT_VERSION\s+1\./) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
70 $aCnt1 = 16;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
71 $aCnt2 = 17;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
72 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
73 if (/^#/) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
74 if (/^>/) { next; } #headers
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
75 if (/^\s*$/) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
76 my @f = split(/\t/);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
77 if (!$f[6]) { next; } #WHAT? most likely still zipped?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
78 if ($f[6] ne 'snp') { next; } #table only has substitutions
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
79 if ($f[5] eq 'het-alt') { next; } #skip heterozygous with no ref match
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
80 if ($f[5] =~ /(hom|het)/) { #zygosity #haploid chrX and chrY?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
81 my $a = $f[7]; #reference allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
82 my $a2;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
83 my $freq;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
84 my $freq2;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
85 my $sc = -1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
86 my $alt;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
87 my $g = 1; #genotype == ref allele count
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
88 if ($f[8] eq $f[9]) { #should be homozygous?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
89 $a2 = $f[8];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
90 $g = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
91 if ($f[10] && $f[10] ne '') { $sc = $f[10]; }#is this the best one to use? or smallest?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
92 }else {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
93 if ($a ne $f[8]) { $a2 = $f[8]; $alt = 8; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
94 elsif ($a ne $f[9]) { $a2 = $f[9]; $alt = 9; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
95 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
96 if (defined $f[10] && defined $f[11] && $alt) { #VAF score in 2.0 format
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
97 if ($f[$alt+2] && $f[$alt+2] ne '') { $sc = $f[$alt+2]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
98 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
99 #version 1.12 columns 16 & 17, version 2.0 Cancer columns 21 & 22
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
100 if (defined $f[$aCnt1] && defined $f[$aCnt2] && $alt) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
101 if ($alt == 8) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
102 $freq = $f[$aCnt2];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
103 $freq2 = $f[$aCnt1];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
104 }elsif ($alt == 9) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
105 $freq = $f[$aCnt1];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
106 $freq2 = $f[$aCnt2];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
107 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
108 }elsif (defined $f[$aCnt1]) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
109 $freq = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
110 $freq2 = $f[$aCnt1];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
111 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
112 #if starting a new table or new SNP in old table
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
113 #add option to only build on current table?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
114 if (!$tab) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
115 print "$f[2]\t$f[3]\t$a\t$a2\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
116 }elsif (!$tabOnly && !exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
117 print "$f[2]\t$f[3]\t$a\t$a2\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
118 }elsif (exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
119 print $old{"$f[2]:$f[3]"};
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
120 $old{"$f[2]:$f[3]"} = ''; #unset so we know it is printed
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
121 }elsif ($tabOnly && !exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
122 next; #skip this one entirely
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
123 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
124 if ($colcnt && !exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
125 #new SNP pad for missing individuals
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
126 my $i = 5;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
127 while ($i < $colcnt) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
128 print "\t-1\t-1\t-1\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
129 $i += 4;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
130 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
131 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
132 #add columns for individual
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
133 print "\t$freq\t$freq2\t$g\t$sc\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
134 }elsif ($f[5] eq 'hap') {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
135 my $g = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
136 my $freq = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
137 my $freq2 = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
138 if (defined $f[10]) { $freq2 = $f[10]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
139 my $sc = -1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
140 if (defined $f[$aCnt1] && $f[$aCnt1] ne '') { $sc = $f[$aCnt1]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
141 if ($f[8]) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
142 if (!$tab) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
143 print "$f[2]\t$f[3]\t$f[7]\t$f[8]\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
144 }elsif (!$tabOnly && !exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
145 print "$f[2]\t$f[3]\t$f[7]\t$f[8]\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
146 }elsif (exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
147 print $old{"$f[2]:$f[3]"};
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
148 $old{"$f[2]:$f[3]"} = ''; #unset so we know it is printed
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
149 }elsif ($tabOnly && !exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
150 next; #skip this one entirely
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
151 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
152 if ($colcnt && !exists $old{"$f[2]:$f[3]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
153 #new SNP pad for missing individuals
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
154 my $i = 5;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
155 while ($i < $colcnt) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
156 print "\t-1\t-1\t-1\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
157 $i += 4;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
158 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
159 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
160 #add columns for individual
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
161 print "\t$freq\t$freq2\t$g\t$sc\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
162 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
163 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
164 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
165 close FH or die "Couldn't close $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
166
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
167 #if adding to a snp table, now we need to finish those not in the latest set
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
168 foreach my $k (keys %old) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
169 if ($old{$k} ne '') { #not printed yet
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
170 print $old{$k}, "\t-1\t-1\t-1\t-1\n"; #plus blank for this one
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
171 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
172 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
173
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
174 exit;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
175
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
176 #parse old header and add or create new
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
177 sub prepHeader {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
178 if (!$build) { $build = 'hg19'; } #set default
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
179 my @cnames;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
180 my @ind;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
181 my $n;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
182 if (@head) { #parse previous header
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
183 my $h = join("", @head); #may split between lines
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
184 if ($h =~ /"column_names":\[(.*?)\]/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
185 my @t = split(/,/, $1);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
186 foreach (@t) { s/"//g; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
187 @cnames = @t;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
188 $n = $cnames[$#cnames];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
189 $n =~ s/Q//;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
190 $n++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
191 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
192 if ($h =~ /"dbkey":"(.*?)"/) { $build = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
193 if ($h =~ /"individuals":\[(.*)\]/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
194 my $t = $1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
195 $t =~ s/\]\].*/]/; #remove if there is more categories
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
196 @ind = split(/,/, $t);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
197 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
198 }else { #start new header
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
199 @cnames = ("chr", "pos", "A", "B", "Q");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
200 $n = 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
201 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
202 #add current
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
203 if (!$name) { $name= 'na'; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
204 my $stcol = $colcnt + 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
205 if ($stcol == 1) { $stcol = 6; } #move past initial columns
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
206 push(@ind, "[\"$name\",$stcol]");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
207 push(@cnames, "${n}A", "${n}B", "${n}G", "${n}Q");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
208 #reassign head
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
209 undef @head;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
210 foreach (@cnames) { $_ = "\"$_\""; } #quote name
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
211 $head[0] = "#{\"column_names\":[" . join(",", @cnames) . "],";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
212 $head[1] = "#\"individuals\":[" . join(",", @ind) . "],";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
213 $head[2] = "#\"dbkey\":\"$build\",\"pos\":2,\"rPos\":2,\"ref\":1,\"scaffold\":1,\"species\":\"$build\"}";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
214 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
215 ####End
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
216
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
217 ##example header
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
218 #{"column_names":["chr","pos","A","B","Q","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","7A","7B","7G","7Q","8A","8B","8G",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
219 #"8Q","9A","9B","9G","9Q","10A","10B","10G","10Q"],"dbkey":"hg19","individuals":[["Boh_15M",6],["Boh_19M",10],["Paya_27F",14],["Paya_2F",18],["Paya_32F",22],["Ruil_2M",26],["Ruil_36M",30],["Ruil_3M",
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
220 #34],["Ruil_40",38],["Ruil_47F",42]],"pos":2,"rPos":2,"ref":1,"scaffold":1,"species":"hg19"}
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
221 #chr1 10290 C T 46.4 0 2 0 7 1 2 0 4 3 2 1 22 0 0 -1 0 1 0 1 4 0 2 0 7 0 0 -1 0 2 3 1 14 0 1 0 4 1 1 1 6