Mercurial > repos > miller-lab > snp_analysis_conversion
annotate master2gd_snp.pl @ 3:edf12470a1a6 default tip
Bugfix from Belinda, in vcf2pgSnp.pl
author | Cathy Riemer <cathy+hg@bx.psu.edu> |
---|---|
date | Thu, 19 Mar 2015 12:06:34 -0400 |
parents | 35c20b109be5 |
children |
rev | line source |
---|---|
2
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
1 #!/usr/bin/perl -w |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
2 use strict; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
3 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
4 #convert from master variant file to snp table (Webb format?) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
5 #new format for version 2.0, also different format for cancer normal pairs |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
6 #set columns for 2.0 version Cancer format |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
7 my $aCnt1 = 21; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
8 my $aCnt2 = 22; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
9 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
10 #snp table format: |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
11 #1. chr |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
12 #2. position (0 based) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
13 #3. ref allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
14 #4. second allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
15 #5. overall quality |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
16 #foreach individual (6-9, 10-13, ...) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
17 #a. count of allele in 3 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
18 #b. count of allele in 4 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
19 #c. genotype call (-1, or count of ref allele) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
20 #d. quality of genotype call (quality of non-ref allele from masterVar) |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
21 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
22 if (!@ARGV) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
23 print "usage: master2gd_snp.pl masterVar.txt[.gz|.bz2] [-tab=snpTable.txt -addColsOnly -build=hg19 -name=na ] > newSnpTable.txt\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
24 exit; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
25 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
26 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
27 my $in = shift @ARGV; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
28 my $tab; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
29 my $tabOnly; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
30 my $build; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
31 my $name; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
32 foreach (@ARGV) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
33 if (/-tab=(.*)/) { $tab = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
34 elsif (/-addColsOnly/) { $tabOnly = 1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
35 elsif (/-build=(.*)/) { $build = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
36 elsif (/-name=(.*)/) { $name = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
37 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
38 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
39 #WARNING loads snp table in memory, this could take > 1G ram |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
40 my %old; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
41 my $colcnt = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
42 my @head; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
43 if ($tab) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
44 open(FH, $tab) or die "Couldn't open $tab, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
45 while (<FH>) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
46 chomp; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
47 if (/^#/) { push(@head, $_); next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
48 my @f = split(/\t/); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
49 $old{"$f[0]:$f[1]"} = join("\t", @f); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
50 $colcnt = scalar @f; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
51 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
52 close FH or die "Couldn't close $tab, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
53 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
54 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
55 if ($in =~ /.gz$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
56 open(FH, "zcat $in |") or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
57 }elsif ($in =~ /.bz2$/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
58 open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
59 }else { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
60 open(FH, $in) or die "Couldn't open $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
61 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
62 prepHeader(); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
63 if (@head) { #keep old header, add new? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
64 print join("\n", @head), "\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
65 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
66 while (<FH>) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
67 chomp; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
68 #FORMAT_VERSION 2.0 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
69 if (/^#FORMAT_VERSION\s+1\./) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
70 $aCnt1 = 16; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
71 $aCnt2 = 17; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
72 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
73 if (/^#/) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
74 if (/^>/) { next; } #headers |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
75 if (/^\s*$/) { next; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
76 my @f = split(/\t/); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
77 if (!$f[6]) { next; } #WHAT? most likely still zipped? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
78 if ($f[6] ne 'snp') { next; } #table only has substitutions |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
79 if ($f[5] eq 'het-alt') { next; } #skip heterozygous with no ref match |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
80 if ($f[5] =~ /(hom|het)/) { #zygosity #haploid chrX and chrY? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
81 my $a = $f[7]; #reference allele |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
82 my $a2; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
83 my $freq; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
84 my $freq2; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
85 my $sc = -1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
86 my $alt; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
87 my $g = 1; #genotype == ref allele count |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
88 if ($f[8] eq $f[9]) { #should be homozygous? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
89 $a2 = $f[8]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
90 $g = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
91 if ($f[10] && $f[10] ne '') { $sc = $f[10]; }#is this the best one to use? or smallest? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
92 }else { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
93 if ($a ne $f[8]) { $a2 = $f[8]; $alt = 8; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
94 elsif ($a ne $f[9]) { $a2 = $f[9]; $alt = 9; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
95 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
96 if (defined $f[10] && defined $f[11] && $alt) { #VAF score in 2.0 format |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
97 if ($f[$alt+2] && $f[$alt+2] ne '') { $sc = $f[$alt+2]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
98 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
99 #version 1.12 columns 16 & 17, version 2.0 Cancer columns 21 & 22 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
100 if (defined $f[$aCnt1] && defined $f[$aCnt2] && $alt) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
101 if ($alt == 8) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
102 $freq = $f[$aCnt2]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
103 $freq2 = $f[$aCnt1]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
104 }elsif ($alt == 9) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
105 $freq = $f[$aCnt1]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
106 $freq2 = $f[$aCnt2]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
107 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
108 }elsif (defined $f[$aCnt1]) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
109 $freq = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
110 $freq2 = $f[$aCnt1]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
111 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
112 #if starting a new table or new SNP in old table |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
113 #add option to only build on current table? |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
114 if (!$tab) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
115 print "$f[2]\t$f[3]\t$a\t$a2\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
116 }elsif (!$tabOnly && !exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
117 print "$f[2]\t$f[3]\t$a\t$a2\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
118 }elsif (exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
119 print $old{"$f[2]:$f[3]"}; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
120 $old{"$f[2]:$f[3]"} = ''; #unset so we know it is printed |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
121 }elsif ($tabOnly && !exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
122 next; #skip this one entirely |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
123 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
124 if ($colcnt && !exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
125 #new SNP pad for missing individuals |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
126 my $i = 5; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
127 while ($i < $colcnt) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
128 print "\t-1\t-1\t-1\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
129 $i += 4; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
130 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
131 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
132 #add columns for individual |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
133 print "\t$freq\t$freq2\t$g\t$sc\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
134 }elsif ($f[5] eq 'hap') { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
135 my $g = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
136 my $freq = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
137 my $freq2 = 0; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
138 if (defined $f[10]) { $freq2 = $f[10]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
139 my $sc = -1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
140 if (defined $f[$aCnt1] && $f[$aCnt1] ne '') { $sc = $f[$aCnt1]; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
141 if ($f[8]) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
142 if (!$tab) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
143 print "$f[2]\t$f[3]\t$f[7]\t$f[8]\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
144 }elsif (!$tabOnly && !exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
145 print "$f[2]\t$f[3]\t$f[7]\t$f[8]\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
146 }elsif (exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
147 print $old{"$f[2]:$f[3]"}; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
148 $old{"$f[2]:$f[3]"} = ''; #unset so we know it is printed |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
149 }elsif ($tabOnly && !exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
150 next; #skip this one entirely |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
151 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
152 if ($colcnt && !exists $old{"$f[2]:$f[3]"}) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
153 #new SNP pad for missing individuals |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
154 my $i = 5; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
155 while ($i < $colcnt) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
156 print "\t-1\t-1\t-1\t-1"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
157 $i += 4; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
158 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
159 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
160 #add columns for individual |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
161 print "\t$freq\t$freq2\t$g\t$sc\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
162 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
163 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
164 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
165 close FH or die "Couldn't close $in, $!\n"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
166 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
167 #if adding to a snp table, now we need to finish those not in the latest set |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
168 foreach my $k (keys %old) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
169 if ($old{$k} ne '') { #not printed yet |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
170 print $old{$k}, "\t-1\t-1\t-1\t-1\n"; #plus blank for this one |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
171 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
172 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
173 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
174 exit; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
175 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
176 #parse old header and add or create new |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
177 sub prepHeader { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
178 if (!$build) { $build = 'hg19'; } #set default |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
179 my @cnames; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
180 my @ind; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
181 my $n; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
182 if (@head) { #parse previous header |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
183 my $h = join("", @head); #may split between lines |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
184 if ($h =~ /"column_names":\[(.*?)\]/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
185 my @t = split(/,/, $1); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
186 foreach (@t) { s/"//g; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
187 @cnames = @t; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
188 $n = $cnames[$#cnames]; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
189 $n =~ s/Q//; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
190 $n++; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
191 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
192 if ($h =~ /"dbkey":"(.*?)"/) { $build = $1; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
193 if ($h =~ /"individuals":\[(.*)\]/) { |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
194 my $t = $1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
195 $t =~ s/\]\].*/]/; #remove if there is more categories |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
196 @ind = split(/,/, $t); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
197 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
198 }else { #start new header |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
199 @cnames = ("chr", "pos", "A", "B", "Q"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
200 $n = 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
201 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
202 #add current |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
203 if (!$name) { $name= 'na'; } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
204 my $stcol = $colcnt + 1; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
205 if ($stcol == 1) { $stcol = 6; } #move past initial columns |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
206 push(@ind, "[\"$name\",$stcol]"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
207 push(@cnames, "${n}A", "${n}B", "${n}G", "${n}Q"); |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
208 #reassign head |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
209 undef @head; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
210 foreach (@cnames) { $_ = "\"$_\""; } #quote name |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
211 $head[0] = "#{\"column_names\":[" . join(",", @cnames) . "],"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
212 $head[1] = "#\"individuals\":[" . join(",", @ind) . "],"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
213 $head[2] = "#\"dbkey\":\"$build\",\"pos\":2,\"rPos\":2,\"ref\":1,\"scaffold\":1,\"species\":\"$build\"}"; |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
214 } |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
215 ####End |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
216 |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
217 ##example header |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
218 #{"column_names":["chr","pos","A","B","Q","1A","1B","1G","1Q","2A","2B","2G","2Q","3A","3B","3G","3Q","4A","4B","4G","4Q","5A","5B","5G","5Q","6A","6B","6G","6Q","7A","7B","7G","7Q","8A","8B","8G", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
219 #"8Q","9A","9B","9G","9Q","10A","10B","10G","10Q"],"dbkey":"hg19","individuals":[["Boh_15M",6],["Boh_19M",10],["Paya_27F",14],["Paya_2F",18],["Paya_32F",22],["Ruil_2M",26],["Ruil_36M",30],["Ruil_3M", |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
220 #34],["Ruil_40",38],["Ruil_47F",42]],"pos":2,"rPos":2,"ref":1,"scaffold":1,"species":"hg19"} |
35c20b109be5
Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff
changeset
|
221 #chr1 10290 C T 46.4 0 2 0 7 1 2 0 4 3 2 1 22 0 0 -1 0 1 0 1 4 0 2 0 7 0 0 -1 0 2 3 1 14 0 1 0 4 1 1 1 6 |