annotate pgSnp2gd_snp.pl @ 3:edf12470a1a6 default tip

Bugfix from Belinda, in vcf2pgSnp.pl
author Cathy Riemer <cathy+hg@bx.psu.edu>
date Thu, 19 Mar 2015 12:06:34 -0400
parents 35c20b109be5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
1 #!/usr/bin/perl -w
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
2 use strict;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
3
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
4 #convert from pgSnp file to snp table (Webb format?)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
5
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
6 #snp table format:
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
7 #1. chr
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
8 #2. position (0 based)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
9 #3. ref allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
10 #4. second allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
11 #5. overall quality
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
12 #foreach individual (6-9, 10-13, ...)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
13 #a. count of allele in 3
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
14 #b. count of allele in 4
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
15 #c. genotype call (-1, or count of ref allele)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
16 #d. quality of genotype call (quality of non-ref allele from masterVar)
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
17
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
18 if (!@ARGV) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
19 print "usage: pgSnp2gd_snp.pl file.pgSnp[.gz|.bz2] [-tab=snpTable.txt -addColsOnly -build=hg19 -name=na -ref=#1based -chr=#1based ] > newSnpTable.txt\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
20 exit;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
21 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
22
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
23 my $in = shift @ARGV;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
24 my $tab;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
25 my $tabOnly;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
26 my $build;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
27 my $name;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
28 my $ref;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
29 my $binChr = 1; #position of chrom column, indicates if bin is added
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
30 foreach (@ARGV) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
31 if (/-tab=(.*)/) { $tab = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
32 elsif (/-addColsOnly/) { $tabOnly = 1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
33 elsif (/-build=(.*)/) { $build = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
34 elsif (/-name=(.*)/) { $name = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
35 elsif (/-ref=(\d+)/) { $ref = $1 - 1; } #go to index
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
36 elsif (/-chr=(\d+)/) { $binChr = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
37 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
38
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
39 if ($binChr == 2 && $ref) { $ref--; } #shift over by 1, we will delete bin
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
40 if ((!$tab or !$tabOnly) && !$ref) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
41 print "Error the reference allele must be in a column in the file if not just adding to a previous SNP table.\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
42 exit;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
43 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
44
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
45 #WARNING loads snp table in memory, this could take > 1G ram
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
46 my %old;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
47 my $colcnt = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
48 my @head;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
49 if ($tab) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
50 open(FH, $tab) or die "Couldn't open $tab, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
51 while (<FH>) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
52 chomp;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
53 if (/^#/) { push(@head, $_); next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
54 my @f = split(/\t/);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
55 $old{"$f[0]:$f[1]"} = join("\t", @f);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
56 $colcnt = scalar @f;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
57 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
58 close FH or die "Couldn't close $tab, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
59 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
60
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
61 if ($in =~ /.gz$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
62 open(FH, "zcat $in |") or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
63 }elsif ($in =~ /.bz2$/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
64 open(FH, "bzcat $in |") or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
65 }else {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
66 open(FH, $in) or die "Couldn't open $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
67 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
68 prepHeader();
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
69 if (@head) { #keep old header, add new?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
70 print join("\n", @head), "\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
71 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
72 while (<FH>) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
73 chomp;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
74 if (/^#/) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
75 if (/^\s*$/) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
76 my @f = split(/\t/);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
77 if ($binChr == 2) { #must have a bin column prepended on the beginning
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
78 shift @f; #delete it
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
79 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
80 if (!$f[3]) { next; } #WHAT? most likely still zipped?
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
81 if ($f[4] > 2) { next; } #can only do cases of 2 alleles
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
82 if ($f[2] == $f[1] or $f[2] - $f[1] != 1) { next; } #no indels
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
83 if ($f[3] =~ /-/) { next; } #no indels
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
84 #if creating a new table need the reference allele in a column
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
85 if (%old && $old{"$f[0]:$f[1]"}) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
86 my @o = split(/\t/, $old{"$f[0]:$f[1]"});
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
87 my $freq = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
88 my $freq2 = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
89 my $sc;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
90 my $g = 1; #genotype == ref allele count
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
91 if ($f[4] == 1) { #should be homozygous
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
92 if ($f[3] eq $o[2]) { $g = 2; $freq = $f[5]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
93 elsif ($f[3] eq $o[3]) { $g = 0; $freq2 = $f[5]; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
94 else { next; } #doesn't match either allele, skip
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
95 $sc = $f[6];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
96 }else {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
97 my $a = 0; #index of a alleles, freq, scores
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
98 my $b = 1; #same for b
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
99 my @all = split(/\//, $f[3]);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
100 if ($o[2] ne $all[0] && $o[2] ne $all[1]) { next; } #must match one
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
101 if ($o[3] ne $all[0] && $o[3] ne $all[1]) { next; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
102 if ($o[2] eq $all[1]) { #switch indexes
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
103 $a = 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
104 $b = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
105 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
106 my @fr = split(/,/, $f[5]);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
107 $freq = $fr[$a];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
108 $freq2 = $fr[$b];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
109 my @s = split(/,/, $f[6]);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
110 $sc = $s[$b];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
111 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
112 #print old
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
113 print $old{"$f[0]:$f[1]"};
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
114 #add new columns
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
115 print "\t$freq\t$freq2\t$g\t$sc\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
116 $old{"$f[0]:$f[1]"} = '';
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
117 }elsif (!$tabOnly) { #new table, or don't have this SNP
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
118 #need reference allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
119 if ($f[3] !~ /$f[$ref]/ && $f[4] == 2) { next; } #no reference allele
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
120 my $freq = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
121 my $freq2 = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
122 my $sc;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
123 my $g = 1; #genotype == ref allele count
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
124 my $alt;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
125 if ($f[4] == 1) { #should be homozygous
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
126 if ($f[3] eq $f[$ref]) { $g = 2; $freq = $f[5]; $alt = 'N'; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
127 else { $g = 0; $freq2 = $f[5]; $alt = $f[3]; } #matches alternate
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
128 $sc = $f[6];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
129 }else {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
130 my $a = 0; #index of a alleles, freq, scores
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
131 my $b = 1; #same for b
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
132 my @all = split(/\//, $f[3]);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
133 if ($f[$ref] ne $all[0] && $f[$ref] ne $all[1]) { next; } #must match one
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
134 if ($f[$ref] eq $all[1]) { #switch indexes
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
135 $a = 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
136 $b = 0;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
137 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
138 my @fr = split(/,/, $f[5]);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
139 $freq = $fr[$a];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
140 $freq2 = $fr[$b];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
141 my @s = split(/,/, $f[6]);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
142 $sc = $s[$b];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
143 $alt = $all[$b];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
144 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
145 #print initial columns
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
146 print "$f[0]\t$f[1]\t$f[$ref]\t$alt\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
147 #pad for other individuals if needed
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
148 my $i = 5;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
149 while ($i < $colcnt) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
150 print "\t-1\t-1\t-1\t-1";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
151 $i += 4;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
152 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
153 #add new columns
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
154 print "\t$freq\t$freq2\t$g\t$sc\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
155 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
156 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
157 close FH or die "Couldn't close $in, $!\n";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
158
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
159 #if adding to a snp table, now we need to finish those not in the latest set
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
160 foreach my $k (keys %old) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
161 if ($old{$k} ne '') { #not printed yet
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
162 print $old{$k}, "\t-1\t-1\t-1\t-1\n"; #plus blank for this one
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
163 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
164 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
165
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
166 exit;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
167
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
168 #parse old header and add or create new
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
169 sub prepHeader {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
170 if (!$build) { $build = 'hg19'; } #set default
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
171 my @cnames;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
172 my @ind;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
173 my $n;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
174 if (@head) { #parse previous header
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
175 my $h = join("", @head); #may split between lines
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
176 if ($h =~ /"column_names":\[(.*?)\]/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
177 my @t = split(/,/, $1);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
178 foreach (@t) { s/"//g; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
179 @cnames = @t;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
180 $n = $cnames[$#cnames];
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
181 $n =~ s/Q//;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
182 $n++;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
183 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
184 if ($h =~ /"dbkey":"(.*?)"/) { $build = $1; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
185 if ($h =~ /"individuals":\[(.*)\]/) {
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
186 my $t = $1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
187 $t =~ s/\]\].*/]/; #remove if there is more categories
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
188 @ind = split(/,/, $t);
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
189 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
190 }else { #start new header
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
191 @cnames = ("chr", "pos", "A", "B", "Q");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
192 $n = 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
193 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
194 #add current
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
195 if (!$name) { $name= 'na'; }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
196 my $stcol = $colcnt + 1;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
197 if ($stcol == 1) { $stcol = 6; } #move past initial columns
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
198 push(@ind, "[\"$name\",$stcol]");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
199 push(@cnames, "${n}A", "${n}B", "${n}G", "${n}Q");
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
200 #reassign head
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
201 undef @head;
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
202 foreach (@cnames) { $_ = "\"$_\""; } #quote name
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
203 $head[0] = "#{\"column_names\":[" . join(",", @cnames) . "],";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
204 $head[1] = "#\"individuals\":[" . join(",", @ind) . "],";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
205 $head[2] = "#\"dbkey\":\"$build\",\"pos\":2,\"rPos\":2,\"ref\":1,\"scaffold\":1,\"species\":\"$build\"}";
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
206 }
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
207 ####End
35c20b109be5 Retrying upload with "bare" tarball (i.e. one without a top containing directory).
cathy
parents:
diff changeset
208