annotate alignTophat.pl @ 11:a9fcdf7991b9 draft

Uploaded
author romaingred
date Fri, 17 Nov 2017 03:41:10 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
1 #!/usr/bin/perl
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
2 use strict;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
3 use warnings;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
4 use Getopt::Long;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
5
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
6 my $max_procs = 8;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
7
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
8 my ($ma, $fastq, $fastqN, $mis,$findSens, $contaminants, $directional, $build_contaminant, $ref, $build_ref, $annotation, $dirBam, $dirBam_cont, $dirBed, $dirBedgraph, $outText, $outSimple, $random_w);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
9
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
10 GetOptions (
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
11 "direction:s" => \$directional,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
12 "random_w:s" => \$random_w,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
13 "find:s" => \$findSens,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
14 "fastq=s" => \$fastq,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
15 "fastq_n=s" => \$fastqN,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
16 "ma=i" => \$ma,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
17 "mis=i" => \$mis,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
18 "contaminants=s" => \$contaminants,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
19 "anno=s" => \$annotation,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
20 "build_contaminant" => \$build_contaminant,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
21 "ref=s" => \$ref,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
22 "build_ref" => \$build_ref,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
23 "dirbamCont=s" => \$dirBam_cont,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
24 "dirbam=s" => \$dirBam,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
25 "dirbedgraph=s" => \$dirBedgraph,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
26 "text=s" => \$outText,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
27 );
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
28
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
29 $directional = lc($directional);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
30 $findSens = lc($findSens);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
31 $random_w = lc($random_w);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
32
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
33 if ($build_ref)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
34 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
35 `(bowtie2-build $ref $ref) 2> /dev/null `;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
36 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
37
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
38 if ($build_contaminant)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
39 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
40 `(bowtie2-build $contaminants $contaminants) 2> /dev/null `;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
41 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
42
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
43 #"outsimple=s" => \$outSimple
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
44 #"annotation=s" => \$annotation,
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
45
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
46 #$annotation = "/data/oldgalaxy/annotation/ara/TAIR10_all_sorted.bed";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
47 #$annotation = "/data/home/pogorelcnikr/SG/humanGenes.bed";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
48
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
49 # mapping against contaminants databases in: contaminants, fastq, # of mismatches, output_dir out: tophat dir
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
50 tophat2($fastq, $contaminants, $mis, 1, 'dir_contaminant');
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
51 `bamToFastq -i dir_contaminant/unmapped.bam -fq clean.fastq`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
52
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
53 # mapping against contaminants databases in: $ref, fastq, # of mismatches, output_dir out: tophat dir
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
54 tophat2('clean.fastq', $ref, $mis, $max_procs, 'dir_reference');
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
55
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
56 my $repTot = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
57 if ($ma == 0)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
58 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
59 open (my $rT, 'dir_reference/align_summary.txt') || die "cannot open dir_reference/align_summary.txt $!";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
60 my @repT = <$rT>;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
61 $repTot = $1 if $repT[2] =~ /\s+Mapped\s+:\s+(\d+).*/;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
62 close $rT;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
63 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
64 else
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
65 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
66 $repTot = $ma;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
67 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
68
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
69 # Creation of bedgraphs
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
70 my $dir_bedgraph = $fastqN.'_bedgraph/';
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
71 findUnique('dir_reference/accepted_hits.bam', $dir_bedgraph, $repTot);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
72
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
73 # count in: unique_mapped.bam out: raw_count_simple antisens_count
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
74 if ($random_w eq 'true')
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
75 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
76 reference('rand_sorted.bam', $repTot);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
77 `mv rand_sorted.bam dir_reference/random_sorted.bam`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
78 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
79 else
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
80 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
81 reference('ac_sorted.bam', $repTot);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
82 `mv ac_sorted.bam dir_reference/unique_sorted.bam`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
83 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
84
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
85 zip('dir_reference', $dirBam);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
86 zip('dir_contaminant', $dirBam_cont);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
87 zip( $dir_bedgraph , $dirBedgraph);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
88
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
89 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
90 # inputs:
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
91 # fastq $_[0]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
92 # bowtie2_index $_[1]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
93 # mismatches # $_[2]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
94 # output directory $_[3]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
95 # max_procs $_[4]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
96 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
97 sub tophat2
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
98 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
99 my ($fastq, $index, $mismtaches, $proc, $out_repertory) = @_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
100 $out_repertory = $out_repertory.'/' unless $out_repertory =~ /\/$/;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
101 # print STDERR "/data/visiteur/tophat-2.0.12.Linux_x86_64/tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
102 `tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq 2>&1`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
103 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
104 ############################################################################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
105
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
106 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
107 # inputs:
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
108 # bam $_[0]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
109 # output directory $_[1]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
110 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
111 sub findUnique
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
112 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
113 my ($bam, $out_bedgraph, $sc) = @_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
114
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
115 mkdir $out_bedgraph;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
116
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
117 my $bedg_uni = $out_bedgraph.'/'.$fastqN.'_unique.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
118 my $bedg_all_multi = $out_bedgraph.'/'.$fastqN.'_all_multi.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
119 my $bedg_random = $out_bedgraph.'/'.$fastqN.'_random_multi.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
120
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
121 my $bedg_uni_p = $out_bedgraph.'/'.$fastqN.'_unique_plusStrand.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
122 my $bedg_all_multi_p = $out_bedgraph.'/'.$fastqN.'_all_multi_plusStrand.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
123 my $bedg_random_p = $out_bedgraph.'/'.$fastqN.'_random_multi_plusStrand.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
124
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
125 my $bedg_uni_m = $out_bedgraph.'/'.$fastqN.'_unique_minusStrand.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
126 my $bedg_all_multi_m = $out_bedgraph.'/'.$fastqN.'_all_multi_minusStrand.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
127 my $bedg_random_m = $out_bedgraph.'/'.$fastqN.'_random_multi_minusStrand.bedgraph' ;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
128
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
129 my $sca = 1000000/$sc;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
130
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
131 #unique file
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
132 `samtools view -H $bam > ac.sam 2> /dev/null && samtools view -@ $max_procs $bam 2> /dev/null |grep NH:i:1\$ >> ac.sam && samtools view -@ $max_procs -Shb ac.sam > ac.bam 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
133 `samtools sort -@ $max_procs ac.bam -o ac_sorted.bam 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
134 `genomeCoverageBed -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
135 if ($directional eq 'true')
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
136 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
137 `genomeCoverageBed -strand + -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_p 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
138 `genomeCoverageBed -strand - -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_m 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
139 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
140 #all multi
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
141 `samtools sort -@ $max_procs $bam -o accepted_hits_sorted.bam 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
142 `genomeCoverageBed -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
143 if ($directional eq 'true')
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
144 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
145 `genomeCoverageBed -strand + -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_p 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
146 `genomeCoverageBed -strand - -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_m 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
147 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
148 #random multi
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
149 `samtools view -@ $max_procs -hb -F256 $bam > rand.bam 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
150 `samtools sort -@ $max_procs rand.bam -o rand_sorted.bam 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
151 `genomeCoverageBed -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
152 if ($directional eq 'true')
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
153 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
154 `genomeCoverageBed -strand + -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_p 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
155 `genomeCoverageBed -strand - -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_m 2> /dev/null`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
156 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
157 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
158 ############################################################################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
159
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
160 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
161 # inputs:
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
162 # bam $_[0]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
163 # output directory $_[1]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
164 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
165 sub reference
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
166 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
167 my $bam = shift;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
168 my $mapnum = shift;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
169
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
170 my @tabRef = ();
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
171
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
172 open (my $anno, $annotation) || die "cannot open annotation file!\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
173 while(<$anno>)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
174 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
175 chomp $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
176 my @split = split /\t/, $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
177 my $size = $split[2] - $split[1];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
178 push @tabRef, [$_,0,0,$size];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
179 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
180 close $anno;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
181
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
182 `bedtools bamtobed -i $bam > ac.bed`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
183
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
184 # my $mapnum = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
185 # open (my $bed, 'ac.bed') || die "cannot open ac.bed\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
186 # while(<$bed>){ $mapnum++; }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
187 # close $bed;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
188
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
189 if ($directional eq 'true')
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
190 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
191 #Same strand
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
192 `bedtools intersect -s -wao -b ac.bed -a $annotation> intersectStranded.bed`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
193 #Different strand
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
194 `bedtools intersect -S -wao -b ac.bed -a $annotation> intersectOppositeStrand.bed`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
195
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
196 open (my $strand, 'intersectStranded.bed') || die "cannot open intersectStranded.bed\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
197 my $i = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
198 my $firstLine = <$strand>; chomp $firstLine;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
199 my @split = split /\t/, $firstLine;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
200 my $prevId = $split[3];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
201 if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
202 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
203 $tabRef[$i]->[1] = 1;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
204 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
205 while(<$strand>)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
206 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
207 chomp $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
208 @split = split /\t/, $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
209 if ($split[3] ne $prevId)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
210 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
211 $i++;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
212 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
213 if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
214 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
215 $tabRef[$i]->[1] += 1;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
216 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
217 $prevId = $split[3];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
218 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
219 close $strand;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
220
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
221 open ($strand, 'intersectOppositeStrand.bed') || die "cannot open intersectOppositeStrand.bed\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
222 $i = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
223 $firstLine = <$strand>; chomp $firstLine;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
224 @split = split /\t/, $firstLine;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
225 $prevId = $split[3];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
226 if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
227 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
228 $tabRef[$i]->[2] = 1;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
229 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
230 while(<$strand>)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
231 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
232 chomp $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
233 @split = split /\t/, $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
234 if ($split[3] ne $prevId)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
235 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
236 $i++;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
237 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
238 if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
239 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
240 $tabRef[$i]->[2] += 1;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
241 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
242 $prevId = $split[3];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
243 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
244 close $strand;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
245 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
246
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
247 else
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
248 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
249 `bedtools intersect -wao -b ac.bed -a $annotation> intersect.bed`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
250
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
251 open (my $strand, 'intersect.bed') || die "cannot open intersect.bed\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
252 my $i = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
253 my $firstLine = <$strand>; chomp $firstLine;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
254 my @split = split /\t/, $firstLine;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
255 my $prevId = $split[3];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
256 if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
257 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
258 $tabRef[$i]->[1] = 1;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
259 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
260 while(<$strand>)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
261 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
262 chomp $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
263 @split = split /\t/, $_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
264 if ($split[3] ne $prevId)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
265 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
266 $i++;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
267 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
268 if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
269 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
270 $tabRef[$i]->[1] += 1;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
271 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
272 $prevId = $split[3];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
273 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
274 close $strand;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
275 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
276
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
277 my $totplus = 0; my $totminus = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
278 foreach my $outPut (@tabRef)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
279 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
280 $totplus += $outPut->[1];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
281 $totminus += $outPut->[2];
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
282 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
283
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
284 open (my $out, ">".$outText) || die "cannot open raw_count.txt\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
285 print $out "Chr\tstart\tend\tID\tscore\tstrand\ttype\tD1\tD2\tD3\treads\trpm\treads on opposite strand\trpm\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
286 my ($Sr, $Rr) = (0,0);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
287 foreach my $outPut (@tabRef)
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
288 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
289 if ($totplus > $totminus || $findSens eq 'false')
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
290 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
291 $Sr = rpm($outPut->[1],$mapnum);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
292 $Rr = rpm($outPut->[2],$mapnum);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
293 print $out $outPut->[0]."\t".$outPut->[1]."\t".$Sr."\t".$outPut->[2]."\t".$Rr."\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
294 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
295 else
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
296 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
297 $Rr = rpm($outPut->[1],$mapnum);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
298 $Sr = rpm($outPut->[2],$mapnum);
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
299 print $out $outPut->[0]."\t".$outPut->[2]."\t".$Sr."\t".$outPut->[1]."\t".$Rr."\n";
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
300 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
301 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
302 close $out;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
303 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
304 ############################################################################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
305
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
306 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
307 # inputs:
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
308 # bam $_[0]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
309 # output directory $_[1]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
310 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
311 sub rpm
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
312 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
313 my $raw_count = shift;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
314 my $mapped_number = shift;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
315 my $RPM = 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
316 $RPM = ($raw_count * 1000000) / ($mapped_number) if $mapped_number != 0;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
317 return $RPM;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
318 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
319 ############################################################################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
320
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
321 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
322 # inputs:
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
323 # bam $_[0]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
324 # output directory $_[1]
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
325 #################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
326 sub zip
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
327 {
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
328 my ($in_dir, $out_tar) = @_;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
329 `tar -cvf tmp.tar $in_dir/`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
330 `mv tmp.tar $out_tar`;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
331 rmdir $in_dir;
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
332 }
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
333 ############################################################################################
a9fcdf7991b9 Uploaded
romaingred
parents:
diff changeset
334