Mercurial > repos > romaingred > gred_rnaseq
view alignTophat.pl @ 13:cc676ee7fe40 draft
Uploaded
author | romaingred |
---|---|
date | Tue, 21 Nov 2017 11:23:34 -0500 |
parents | a9fcdf7991b9 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use warnings; use Getopt::Long; my $max_procs = 8; my ($ma, $fastq, $fastqN, $mis,$findSens, $contaminants, $directional, $build_contaminant, $ref, $build_ref, $annotation, $dirBam, $dirBam_cont, $dirBed, $dirBedgraph, $outText, $outSimple, $random_w); GetOptions ( "direction:s" => \$directional, "random_w:s" => \$random_w, "find:s" => \$findSens, "fastq=s" => \$fastq, "fastq_n=s" => \$fastqN, "ma=i" => \$ma, "mis=i" => \$mis, "contaminants=s" => \$contaminants, "anno=s" => \$annotation, "build_contaminant" => \$build_contaminant, "ref=s" => \$ref, "build_ref" => \$build_ref, "dirbamCont=s" => \$dirBam_cont, "dirbam=s" => \$dirBam, "dirbedgraph=s" => \$dirBedgraph, "text=s" => \$outText, ); $directional = lc($directional); $findSens = lc($findSens); $random_w = lc($random_w); if ($build_ref) { `(bowtie2-build $ref $ref) 2> /dev/null `; } if ($build_contaminant) { `(bowtie2-build $contaminants $contaminants) 2> /dev/null `; } #"outsimple=s" => \$outSimple #"annotation=s" => \$annotation, #$annotation = "/data/oldgalaxy/annotation/ara/TAIR10_all_sorted.bed"; #$annotation = "/data/home/pogorelcnikr/SG/humanGenes.bed"; # mapping against contaminants databases in: contaminants, fastq, # of mismatches, output_dir out: tophat dir tophat2($fastq, $contaminants, $mis, 1, 'dir_contaminant'); `bamToFastq -i dir_contaminant/unmapped.bam -fq clean.fastq`; # mapping against contaminants databases in: $ref, fastq, # of mismatches, output_dir out: tophat dir tophat2('clean.fastq', $ref, $mis, $max_procs, 'dir_reference'); my $repTot = 0; if ($ma == 0) { open (my $rT, 'dir_reference/align_summary.txt') || die "cannot open dir_reference/align_summary.txt $!"; my @repT = <$rT>; $repTot = $1 if $repT[2] =~ /\s+Mapped\s+:\s+(\d+).*/; close $rT; } else { $repTot = $ma; } # Creation of bedgraphs my $dir_bedgraph = $fastqN.'_bedgraph/'; findUnique('dir_reference/accepted_hits.bam', $dir_bedgraph, $repTot); # count in: unique_mapped.bam out: raw_count_simple antisens_count if ($random_w eq 'true') { reference('rand_sorted.bam', $repTot); `mv rand_sorted.bam dir_reference/random_sorted.bam`; } else { reference('ac_sorted.bam', $repTot); `mv ac_sorted.bam dir_reference/unique_sorted.bam`; } zip('dir_reference', $dirBam); zip('dir_contaminant', $dirBam_cont); zip( $dir_bedgraph , $dirBedgraph); ################################# # inputs: # fastq $_[0] # bowtie2_index $_[1] # mismatches # $_[2] # output directory $_[3] # max_procs $_[4] ################################# sub tophat2 { my ($fastq, $index, $mismtaches, $proc, $out_repertory) = @_; $out_repertory = $out_repertory.'/' unless $out_repertory =~ /\/$/; # print STDERR "/data/visiteur/tophat-2.0.12.Linux_x86_64/tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq\n"; `tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq 2>&1`; } ############################################################################################ ################################# # inputs: # bam $_[0] # output directory $_[1] ################################# sub findUnique { my ($bam, $out_bedgraph, $sc) = @_; mkdir $out_bedgraph; my $bedg_uni = $out_bedgraph.'/'.$fastqN.'_unique.bedgraph' ; my $bedg_all_multi = $out_bedgraph.'/'.$fastqN.'_all_multi.bedgraph' ; my $bedg_random = $out_bedgraph.'/'.$fastqN.'_random_multi.bedgraph' ; my $bedg_uni_p = $out_bedgraph.'/'.$fastqN.'_unique_plusStrand.bedgraph' ; my $bedg_all_multi_p = $out_bedgraph.'/'.$fastqN.'_all_multi_plusStrand.bedgraph' ; my $bedg_random_p = $out_bedgraph.'/'.$fastqN.'_random_multi_plusStrand.bedgraph' ; my $bedg_uni_m = $out_bedgraph.'/'.$fastqN.'_unique_minusStrand.bedgraph' ; my $bedg_all_multi_m = $out_bedgraph.'/'.$fastqN.'_all_multi_minusStrand.bedgraph' ; my $bedg_random_m = $out_bedgraph.'/'.$fastqN.'_random_multi_minusStrand.bedgraph' ; my $sca = 1000000/$sc; #unique file `samtools view -H $bam > ac.sam 2> /dev/null && samtools view -@ $max_procs $bam 2> /dev/null |grep NH:i:1\$ >> ac.sam && samtools view -@ $max_procs -Shb ac.sam > ac.bam 2> /dev/null`; `samtools sort -@ $max_procs ac.bam -o ac_sorted.bam 2> /dev/null`; `genomeCoverageBed -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni 2> /dev/null`; if ($directional eq 'true') { `genomeCoverageBed -strand + -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_p 2> /dev/null`; `genomeCoverageBed -strand - -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_m 2> /dev/null`; } #all multi `samtools sort -@ $max_procs $bam -o accepted_hits_sorted.bam 2> /dev/null`; `genomeCoverageBed -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi 2> /dev/null`; if ($directional eq 'true') { `genomeCoverageBed -strand + -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_p 2> /dev/null`; `genomeCoverageBed -strand - -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_m 2> /dev/null`; } #random multi `samtools view -@ $max_procs -hb -F256 $bam > rand.bam 2> /dev/null`; `samtools sort -@ $max_procs rand.bam -o rand_sorted.bam 2> /dev/null`; `genomeCoverageBed -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random 2> /dev/null`; if ($directional eq 'true') { `genomeCoverageBed -strand + -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_p 2> /dev/null`; `genomeCoverageBed -strand - -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_m 2> /dev/null`; } } ############################################################################################ ################################# # inputs: # bam $_[0] # output directory $_[1] ################################# sub reference { my $bam = shift; my $mapnum = shift; my @tabRef = (); open (my $anno, $annotation) || die "cannot open annotation file!\n"; while(<$anno>) { chomp $_; my @split = split /\t/, $_; my $size = $split[2] - $split[1]; push @tabRef, [$_,0,0,$size]; } close $anno; `bedtools bamtobed -i $bam > ac.bed`; # my $mapnum = 0; # open (my $bed, 'ac.bed') || die "cannot open ac.bed\n"; # while(<$bed>){ $mapnum++; } # close $bed; if ($directional eq 'true') { #Same strand `bedtools intersect -s -wao -b ac.bed -a $annotation> intersectStranded.bed`; #Different strand `bedtools intersect -S -wao -b ac.bed -a $annotation> intersectOppositeStrand.bed`; open (my $strand, 'intersectStranded.bed') || die "cannot open intersectStranded.bed\n"; my $i = 0; my $firstLine = <$strand>; chomp $firstLine; my @split = split /\t/, $firstLine; my $prevId = $split[3]; if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) { $tabRef[$i]->[1] = 1; } while(<$strand>) { chomp $_; @split = split /\t/, $_; if ($split[3] ne $prevId) { $i++; } if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) { $tabRef[$i]->[1] += 1; } $prevId = $split[3]; } close $strand; open ($strand, 'intersectOppositeStrand.bed') || die "cannot open intersectOppositeStrand.bed\n"; $i = 0; $firstLine = <$strand>; chomp $firstLine; @split = split /\t/, $firstLine; $prevId = $split[3]; if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) { $tabRef[$i]->[2] = 1; } while(<$strand>) { chomp $_; @split = split /\t/, $_; if ($split[3] ne $prevId) { $i++; } if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) { $tabRef[$i]->[2] += 1; } $prevId = $split[3]; } close $strand; } else { `bedtools intersect -wao -b ac.bed -a $annotation> intersect.bed`; open (my $strand, 'intersect.bed') || die "cannot open intersect.bed\n"; my $i = 0; my $firstLine = <$strand>; chomp $firstLine; my @split = split /\t/, $firstLine; my $prevId = $split[3]; if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) { $tabRef[$i]->[1] = 1; } while(<$strand>) { chomp $_; @split = split /\t/, $_; if ($split[3] ne $prevId) { $i++; } if ($split[16] != 0 && $split[12] - $split[11] == $split[16]) { $tabRef[$i]->[1] += 1; } $prevId = $split[3]; } close $strand; } my $totplus = 0; my $totminus = 0; foreach my $outPut (@tabRef) { $totplus += $outPut->[1]; $totminus += $outPut->[2]; } open (my $out, ">".$outText) || die "cannot open raw_count.txt\n"; print $out "Chr\tstart\tend\tID\tscore\tstrand\ttype\tD1\tD2\tD3\treads\trpm\treads on opposite strand\trpm\n"; my ($Sr, $Rr) = (0,0); foreach my $outPut (@tabRef) { if ($totplus > $totminus || $findSens eq 'false') { $Sr = rpm($outPut->[1],$mapnum); $Rr = rpm($outPut->[2],$mapnum); print $out $outPut->[0]."\t".$outPut->[1]."\t".$Sr."\t".$outPut->[2]."\t".$Rr."\n"; } else { $Rr = rpm($outPut->[1],$mapnum); $Sr = rpm($outPut->[2],$mapnum); print $out $outPut->[0]."\t".$outPut->[2]."\t".$Sr."\t".$outPut->[1]."\t".$Rr."\n"; } } close $out; } ############################################################################################ ################################# # inputs: # bam $_[0] # output directory $_[1] ################################# sub rpm { my $raw_count = shift; my $mapped_number = shift; my $RPM = 0; $RPM = ($raw_count * 1000000) / ($mapped_number) if $mapped_number != 0; return $RPM; } ############################################################################################ ################################# # inputs: # bam $_[0] # output directory $_[1] ################################# sub zip { my ($in_dir, $out_tar) = @_; `tar -cvf tmp.tar $in_dir/`; `mv tmp.tar $out_tar`; rmdir $in_dir; } ############################################################################################