annotate jsm_to_vcf.pl @ 2:26953f1c8af2 draft default tip

Uploaded
author fcaramia
date Thu, 20 Jun 2013 00:53:38 -0400
parents a1034918ab9b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
1 die qq(
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
2 Bad numbr of inputs
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
3
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
4 ) if(!@ARGV);
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
5
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
6 my $input=$ARGV[0];
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
7 my $vcf=$ARGV[1];
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
8
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
9
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
10 # Convert output to VCF format
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
11 open(FH, $input) or die "Couldn't open jsm file $input!\n";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
12 open(OUT, ">$vcf") or die "Couldn't create vcf file $vcf!\n";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
13
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
14 # print the vcf format we are using
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
15 print OUT "##fileformat=VCFv4.1\n";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
16
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
17 # grab header which is the first line after the comment lines which start with ##
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
18 my $header = <FH>;
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
19 while(grep(/^##/, $header))
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
20 {
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
21 $header = <FH>;
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
22 }
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
23 my @head = split("\t", $header);
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
24
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
25 print "Converting jsm output to vcf\n";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
26 # vcf header is
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
27 # #CHROM POS ID REF ALT QUAL FILTER INFO
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
28 print OUT "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
29 # for each line in jsm transform to vcf, any columns not in vcf concatenate them
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
30 # together and place them in the info column
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
31 while (my $line = <FH>)
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
32 {
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
33 chomp $line;
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
34 my @fields = split("\t", $line);
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
35 # create info column
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
36 # tumor_name=MH208_TUMOR;normal_name=MH208_LIVER;...;n_alt_sum=702
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
37 my @info;
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
38 for(my $index = 4; $index < $#fields; $index++)
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
39 {
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
40 push @info, "$head[$index]=$fields[$index]";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
41 }
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
42 my $infofield = join(";", @info);
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
43 $fields[-1] = "PASS";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
44
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
45 # print the line
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
46 print OUT "$fields[0]\t$fields[1]\t.\t$fields[2]\t$fields[3]\t.\t$fields[-1]\t$infofield\n";
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
47 }
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
48 close(FH);
a1034918ab9b Uploaded
fcaramia
parents:
diff changeset
49 close(OUT);