annotate rapsodyn/filtersam_mapped_and_unique.pl @ 7:3f7b0788a1c4 draft

Uploaded
author mcharles
date Tue, 07 Oct 2014 10:34:34 -0400
parents 442a7c88b886
children 0a6c1cfe4dc8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
2 #V1.0.1 added log, option parameters
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
3 use strict;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
4 use warnings;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
5 use Getopt::Long;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
6
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
7 my $input_sam_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
8 my $output_sam_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
9 my $log_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
10
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
11 my %bitscore_all;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
12 my %bitscore_selected;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
13
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
14 GetOptions (
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
15 "input_sam_file=s" => \$input_sam_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
16 "output_sam_file=s" => \$output_sam_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
17 "log_file=s" => \$log_file
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
18 ) or die("Error in command line arguments\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
19
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
20 open(IN, $input_sam_file) or die ("Can't open $input_sam_file\n");
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
21 while (my $line=<IN>){
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
22 if (($line =~ /^\@SQ/)||($line =~ /^\@PG/)){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
23 #Header conservation
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
24 print $line;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
25 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
26 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
27 #Optionnal flag verification
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
28 my @fields_all = split (/\s+/,$line);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
29 my $bit = $fields_all[1];
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
30 if ($bitscore_all{$bit}){
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
31 $bitscore_all{$bit}++;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
32 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
33 else {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
34 $bitscore_all{$bit}=1;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
35 }
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
36 if (($line =~ /XT\:A\:U/)&&($line =~ /X0\:i\:1/)&&($line =~ /X1\:i\:0\s/)){
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
37 my @fields_selected = split (/\s+/,$line);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
38 if (($fields_selected[1]==83)||($fields_selected[1]==163)||($fields_selected[1]==147)||($fields_selected[1]==99)){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
39 print $line;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
40 my $bit = $fields_selected[1];
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
41 if ($bitscore_selected{$bit}){
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
42 $bitscore_selected{$bit}++;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
43 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
44 else {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
45 $bitscore_selected{$bit}=1;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
46 }
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
47 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
48 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
49 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
50 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
51
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
52 close (IN);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
53
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
54 open (LF,">$log_file") or die("Can't open $log_file\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
55 print LF "\n####\t Sam filtering \n";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
56 print LF "## Before filtering\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
57 print LF "bitscore\t:\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
58 foreach my $key (sort {$bitscore_all{$b} <=> $bitscore_all{$a}} keys %bitscore_all) {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
59 print LF $key,"\t*\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
60 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
61 print LF "\n number \t:\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
62 foreach my $key (sort {$bitscore_all{$b} <=> $bitscore_all{$a}} keys %bitscore_all) {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
63 print LF $bitscore_all{$key},"\t*\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
64 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
65 print LF "\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
66 print LF "## After filtering\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
67 print LF "bitscore\t:\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
68 foreach my $key (sort {$bitscore_selected{$b} <=> $bitscore_selected{$a}} keys %bitscore_selected) {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
69 print LF $key,"\t*\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
70 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
71 print LF "\n number \t:\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
72 foreach my $key (sort {$bitscore_selected{$b} <=> $bitscore_selected{$a}} keys %bitscore_selected) {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
73 print LF $bitscore_selected{$key},"\t*\t";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
74 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
75 print LF "\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
76 close (LF);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
77
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
78
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
79
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
80
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
81
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
82
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
83