comparison wig_extractor.pl @ 32:117639df067a draft

Uploaded
author eiriche
date Mon, 03 Dec 2012 03:10:23 -0500
parents
children
comparison
equal deleted inserted replaced
31:35cfb51eb545 32:117639df067a
1 #!/usr/bin/perl
2 use Getopt::Long;
3 use Cwd;
4
5 my $filename;
6 my $parent_dir = getcwd();
7
8 my %fhs;
9 my %bases_cov;
10 my %bases_meth;
11 my ($cov_cutoff,$cov_file, $meth_file, $context, $cov_out, $meth_out) = process_commandline();
12
13 process_results_file();
14
15 sub process_commandline{
16 my $cov_cutoff=1;
17 my $cov_file=0;
18 my $meth_file=0;
19 my $context="cpg,chh,chg";
20 #my $chrom_out;
21
22 # Files to extract:
23 # c -> Coverage
24 # m -> Methylation
25 # b -> Coverage + Methylation
26
27 my $files_to_extract;
28 my $command_line = GetOptions ( 'cutoff=i' => \$cov_cutoff,
29 'e|extract=s' => \$files_to_extract,
30 'context=s' => \$context,
31 #'chrom=s' => \$chrom_out,
32 'cov_out=s' => \$cov_out,
33 'meth_out=s' => \$meth_out);
34
35
36 ### EXIT ON ERROR if there are errors with any of the supplied options
37 unless ($command_line){
38 die "Please respecify command line options\n";
39 }
40
41 ### no files provided
42 unless (@ARGV){
43 die "You need to provide a file to parse!\n";
44 }
45 $filename = $ARGV[0];
46
47 ### no files to extract specified
48 unless ($files_to_extract){
49 die "You need to specify the file you want to extract!\n";
50 }
51
52 if ($files_to_extract eq "c" or $files_to_extract eq "b"){
53 $cov_file=1;
54 }
55 if ($files_to_extract eq "m" or $files_to_extract eq "b"){
56 $meth_file=1;
57 }
58 return ($cov_cutoff,$cov_file, $meth_file, $context, $cov_out, $meth_out);
59 }
60
61 sub process_results_file{
62 %fhs = ();
63 #if (defined($chrom_out)){
64 # $chrom_out = "chr".$chrom_out;
65 # print "\n$chrom_out\n";
66 #}
67 warn "\nNow reading in input file $filename\n\n";
68 open (IN,$filename) or die "Can't open file $filename: $!\n";
69
70 my($dir, $output_filename, $extension) = $filename =~ m/(.*\/)(.*)(\..*)$/;
71
72 ### OPENING OUTPUT-FILEHANDLES
73 my $wig_cov = my $wig_meth = $output_filename;
74 if ($cov_file == 1){
75 #$wig_cov =~ s/^/Coverage_/;
76 #$wig_cov = $dir."/".$wig_cov.".wig";
77 open ($fhs{wig_cov},'>',$cov_out) or die "Failed to write to $cov_out $!\n";
78 printf {$fhs{wig_cov}} ('track name="'.$output_filename.'" description="coverage per base" visibility=3 type=wiggle_0 color=50,205,50'."\n");
79 }
80 if ($meth_file == 1){
81 $wig_meth =~ s/^/Methylation_/;
82 $wig_meth = $dir."/".$wig_meth.".wig";
83 open ($fhs{wig_meth},'>',$meth_out) or die "Failed to write to $meth_out $!\n";
84 printf {$fhs{wig_meth}} ('track name="'.$output_filename.'" description="percentage methylation" visibility=3 type=wiggle_0 color=50,205,50'."\n");
85 }
86
87
88 while (<IN>){
89 next if $. == 1;
90 my ($chrom,$start,$strand,$cont,$coverage,$percentage_meth) = (split("\t"))[0,1,2,3,4,6];
91 $chrom = "\L$chrom";
92 if ("\U$chrom" !~ /CHR.*/){
93 $chrom = "chr".$chrom;
94 }
95 next if $coverage < $cov_cutoff;
96 #print "\n$chrom:$chrom_out\n";
97 next if (defined($chrom_out) and ($chrom ne $chrom_out));
98 #print "\n2\n";
99 next if not ("\U$context" =~ $cont);
100 #print "\n3\n";
101
102 if (defined($last_chrom) and $last_chrom ne $chrom){
103 write_files($last_chrom);
104 }
105 if ($cov_file == 1){
106 $bases_cov{$start}=$strand.$coverage;
107 }
108
109 if ($meth_file == 1){
110 $bases_meth{$start}=$strand.$percentage_meth;
111 }
112
113 $last_chrom = $chrom;
114 }
115 write_files($last_chrom);
116 }
117
118
119
120 sub write_files(){
121 my ($chrom) = @_;
122 # modify chromosome name, if not starting with "chr.."
123
124
125 if ($cov_file == 1){
126 printf {$fhs{wig_cov}} ("variableStep chrom=".$chrom." span=1\n");
127 for my $k1 (sort { $a <=> $b } keys %bases_cov){
128 printf {$fhs{wig_cov}} ("%u\t%d\n",$k1, $bases_cov{$k1});
129 }
130 %bases_cov =();
131 }
132
133 if ($meth_file == 1){
134 printf {$fhs{wig_meth}} ("variableStep chrom=".$chrom." span=1\n");
135 for my $k1 (sort { $a <=> $b } keys %bases_meth){
136 printf {$fhs{wig_meth}} ("%u\t%.2f\n",$k1, $bases_meth{$k1});
137 }
138 %bases_meth =();
139 }
140 }
141
142
143