Previous changeset 10:3e1543335c06 (2017-11-17) Next changeset 12:928b9b0aa75b (2017-11-17) |
Commit message:
Uploaded |
added:
alignTophat.pl |
b |
diff -r 3e1543335c06 -r a9fcdf7991b9 alignTophat.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alignTophat.pl Fri Nov 17 03:41:10 2017 -0500 |
[ |
b'@@ -0,0 +1,334 @@\n+#!/usr/bin/perl\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $max_procs = 8;\n+\n+my ($ma, $fastq, $fastqN, $mis,$findSens, $contaminants, $directional, $build_contaminant, $ref, $build_ref, $annotation, $dirBam, $dirBam_cont, $dirBed, $dirBedgraph, $outText, $outSimple, $random_w);\n+\n+GetOptions (\n+"direction:s" => \\$directional,\n+"random_w:s" => \\$random_w,\n+"find:s" => \\$findSens,\n+"fastq=s" => \\$fastq,\n+"fastq_n=s" => \\$fastqN, \n+"ma=i" => \\$ma,\n+"mis=i" => \\$mis,\n+"contaminants=s" => \\$contaminants,\n+"anno=s" => \\$annotation,\n+"build_contaminant" => \\$build_contaminant,\n+"ref=s" => \\$ref,\n+"build_ref" => \\$build_ref,\n+"dirbamCont=s" => \\$dirBam_cont,\n+"dirbam=s" => \\$dirBam,\n+"dirbedgraph=s" => \\$dirBedgraph,\n+"text=s" => \\$outText,\n+);\n+\n+$directional = lc($directional);\n+$findSens = lc($findSens);\n+$random_w = lc($random_w);\n+\n+if ($build_ref)\n+{\n+ `(bowtie2-build $ref $ref) 2> /dev/null `;\n+}\n+\n+if ($build_contaminant)\n+{\n+ `(bowtie2-build $contaminants $contaminants) 2> /dev/null `;\n+}\n+\n+#"outsimple=s" => \\$outSimple\n+#"annotation=s" => \\$annotation,\n+\n+#$annotation = "/data/oldgalaxy/annotation/ara/TAIR10_all_sorted.bed";\n+#$annotation = "/data/home/pogorelcnikr/SG/humanGenes.bed";\n+\n+# mapping against contaminants databases in: contaminants, fastq, # of mismatches, output_dir out: tophat dir\n+tophat2($fastq, $contaminants, $mis, 1, \'dir_contaminant\');\n+`bamToFastq -i dir_contaminant/unmapped.bam -fq clean.fastq`;\n+\n+# mapping against contaminants databases in: $ref, fastq, # of mismatches, output_dir out: tophat dir\n+tophat2(\'clean.fastq\', $ref, $mis, $max_procs, \'dir_reference\');\n+\n+my $repTot = 0;\n+if ($ma == 0)\n+{\n+ open (my $rT, \'dir_reference/align_summary.txt\') || die "cannot open dir_reference/align_summary.txt $!";\n+ my @repT = <$rT>;\n+ $repTot = $1 if $repT[2] =~ /\\s+Mapped\\s+:\\s+(\\d+).*/;\n+ close $rT;\n+}\n+else\n+{\n+ $repTot = $ma;\n+}\n+\n+# Creation of bedgraphs\n+my $dir_bedgraph = $fastqN.\'_bedgraph/\';\n+findUnique(\'dir_reference/accepted_hits.bam\', $dir_bedgraph, $repTot);\n+\n+# count in: unique_mapped.bam out: raw_count_simple antisens_count\n+if ($random_w eq \'true\')\n+{\n+ reference(\'rand_sorted.bam\', $repTot);\n+ `mv rand_sorted.bam dir_reference/random_sorted.bam`;\n+}\n+else\n+{\n+ reference(\'ac_sorted.bam\', $repTot);\n+ `mv ac_sorted.bam dir_reference/unique_sorted.bam`;\n+}\n+\n+zip(\'dir_reference\', $dirBam);\n+zip(\'dir_contaminant\', $dirBam_cont);\n+zip( $dir_bedgraph , $dirBedgraph);\n+\n+#################################\n+# inputs:\n+# fastq $_[0]\n+# bowtie2_index $_[1]\n+# mismatches # $_[2]\n+# output directory $_[3]\n+# max_procs $_[4]\n+#################################\n+sub tophat2\n+{\n+ my ($fastq, $index, $mismtaches, $proc, $out_repertory) = @_;\n+ $out_repertory = $out_repertory.\'/\' unless $out_repertory =~ /\\/$/;\n+ # print STDERR "/data/visiteur/tophat-2.0.12.Linux_x86_64/tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq\\n";\n+ `tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq 2>&1`;\n+}\n+############################################################################################\n+\n+#################################\n+# inputs:\n+# bam $_[0]\n+# output directory $_[1]\n+#################################\n+sub findUnique\n+{\n+ my ($bam, $out_bedgraph, $sc) = @_;\n+ \n+ mkdir $out_bedgraph;\n+\n+ my $bedg_uni = $out_bedgraph.\'/\'.$fastqN.\'_unique.bedgraph\' ;\n+ my $bedg_all_multi = $out_bedgraph.\'/\'.$fastqN.\'_all_multi.bedgraph\' ;\n+ my $bedg_random = $out_bedgraph.\'/\'.$fastqN.\'_random_multi.bedgraph\' ;\n+\n+ my $bedg_uni_p = $out_bedgraph.\'/\'.$fastqN.\'_unique_plusStrand.bedgraph\' ;\n+ my $bedg_all_multi_p = $out_bedgraph.\'/\'.$fastqN.\'_all_multi_plusStrand.bedgraph\' ;\n+ my $bedg_random_p = $out_bedgraph.\'/\'.$fastqN.\'_random_multi_plusStrand.bedgraph\' ;\n+\n+ my $bedg_uni_m = $out_bedgraph.\'/\'.$fastqN.\'_unique_minusStrand.bedgraph\' ;\n+ my $bedg_all_multi_m = $out_bedgraph.\'/\'.$fastqN.\'_all_multi_minusStrand'..b'if ($directional eq \'true\')\n+ {\n+ #Same strand\n+ `bedtools intersect -s -wao -b ac.bed -a $annotation> intersectStranded.bed`;\n+ #Different strand\n+ `bedtools intersect -S -wao -b ac.bed -a $annotation> intersectOppositeStrand.bed`;\n+ \n+ open (my $strand, \'intersectStranded.bed\') || die "cannot open intersectStranded.bed\\n";\n+ my $i = 0;\n+ my $firstLine = <$strand>; chomp $firstLine;\n+ my @split = split /\\t/, $firstLine;\n+ my $prevId = $split[3];\n+ if ($split[16] != 0 && $split[12] - $split[11] == $split[16])\n+ {\n+ $tabRef[$i]->[1] = 1;\n+ }\n+ while(<$strand>)\n+ {\n+ chomp $_;\n+ @split = split /\\t/, $_;\n+ if ($split[3] ne $prevId)\n+ {\n+ $i++;\n+ }\n+ if ($split[16] != 0 && $split[12] - $split[11] == $split[16])\n+ {\n+ $tabRef[$i]->[1] += 1;\n+ }\n+ $prevId = $split[3];\n+ }\n+ close $strand;\n+ \n+ open ($strand, \'intersectOppositeStrand.bed\') || die "cannot open intersectOppositeStrand.bed\\n";\n+ $i = 0;\n+ $firstLine = <$strand>; chomp $firstLine;\n+ @split = split /\\t/, $firstLine;\n+ $prevId = $split[3];\n+ if ($split[16] != 0 && $split[12] - $split[11] == $split[16])\n+ {\n+ $tabRef[$i]->[2] = 1;\n+ }\n+ while(<$strand>)\n+ {\n+ chomp $_;\n+ @split = split /\\t/, $_;\n+ if ($split[3] ne $prevId)\n+ {\n+ $i++;\n+ }\n+ if ($split[16] != 0 && $split[12] - $split[11] == $split[16])\n+ {\n+ $tabRef[$i]->[2] += 1;\n+ }\n+ $prevId = $split[3];\n+ }\n+ close $strand;\n+ }\n+\n+ else\n+ {\n+ `bedtools intersect -wao -b ac.bed -a $annotation> intersect.bed`;\n+\n+ open (my $strand, \'intersect.bed\') || die "cannot open intersect.bed\\n";\n+ my $i = 0;\n+ my $firstLine = <$strand>; chomp $firstLine;\n+ my @split = split /\\t/, $firstLine;\n+ my $prevId = $split[3];\n+ if ($split[16] != 0 && $split[12] - $split[11] == $split[16])\n+ {\n+ $tabRef[$i]->[1] = 1;\n+ }\n+ while(<$strand>)\n+ {\n+ chomp $_;\n+ @split = split /\\t/, $_;\n+ if ($split[3] ne $prevId)\n+ {\n+ $i++;\n+ }\n+ if ($split[16] != 0 && $split[12] - $split[11] == $split[16])\n+ {\n+ $tabRef[$i]->[1] += 1;\n+ }\n+ $prevId = $split[3];\n+ }\n+ close $strand; \n+ }\n+ \n+ my $totplus = 0; my $totminus = 0;\n+ foreach my $outPut (@tabRef)\n+ {\n+ $totplus += $outPut->[1];\n+ $totminus += $outPut->[2];\n+ }\n+ \n+ open (my $out, ">".$outText) || die "cannot open raw_count.txt\\n";\n+ print $out "Chr\\tstart\\tend\\tID\\tscore\\tstrand\\ttype\\tD1\\tD2\\tD3\\treads\\trpm\\treads on opposite strand\\trpm\\n";\n+ my ($Sr, $Rr) = (0,0);\n+ foreach my $outPut (@tabRef)\n+ {\n+ if ($totplus > $totminus || $findSens eq \'false\')\n+ {\n+ $Sr = rpm($outPut->[1],$mapnum);\n+ $Rr = rpm($outPut->[2],$mapnum);\n+ print $out $outPut->[0]."\\t".$outPut->[1]."\\t".$Sr."\\t".$outPut->[2]."\\t".$Rr."\\n";\n+ }\n+ else\n+ {\n+ $Rr = rpm($outPut->[1],$mapnum);\n+ $Sr = rpm($outPut->[2],$mapnum);\n+ print $out $outPut->[0]."\\t".$outPut->[2]."\\t".$Sr."\\t".$outPut->[1]."\\t".$Rr."\\n";\n+ }\n+ }\n+ close $out;\n+}\n+############################################################################################\n+\n+#################################\n+# inputs:\n+# bam $_[0]\n+# output directory $_[1]\n+#################################\n+sub rpm\n+{\n+ my $raw_count = shift;\n+ my $mapped_number = shift;\n+ my $RPM = 0;\n+ $RPM = ($raw_count * 1000000) / ($mapped_number) if $mapped_number != 0;\n+ return $RPM;\n+}\n+############################################################################################\n+\n+#################################\n+# inputs:\n+# bam $_[0]\n+# output directory $_[1]\n+#################################\n+sub zip\n+{\n+ my ($in_dir, $out_tar) = @_;\n+ `tar -cvf tmp.tar $in_dir/`;\n+ `mv tmp.tar $out_tar`;\n+ rmdir $in_dir;\n+}\n+############################################################################################\n+\n' |