changeset 7:5a33149326c0 draft

Deleted selected files
author romaingred
date Fri, 17 Nov 2017 03:34:28 -0500
parents 1b586f3740a1
children 3bb645f4c0d7
files alignTophat.pl
diffstat 1 files changed, 0 insertions(+), 334 deletions(-) [+]
line wrap: on
line diff
--- a/alignTophat.pl	Fri Nov 17 03:34:15 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,334 +0,0 @@
-#!/usr/bin/perl
-use strict;
-use warnings;
-use Getopt::Long;
-
-my $max_procs = 8;
-
-my ($ma, $fastq, $fastqN, $mis,$findSens, $contaminants, $directional, $build_contaminant, $ref, $build_ref, $annotation, $dirBam, $dirBam_cont,$dirBed, $dirBedgraph, $outText, $outSimple, $random_w);
-
-GetOptions (
-"direction:s" => \$directional,
-"random_w:s" => \$random_w,
-"find:s" => \$findSens,
-"fastq=s" => \$fastq,
-"fastq_n=s" => \$fastqN, 
-"ma=i" => \$ma,
-"mis=i" => \$mis,
-"contaminants=s" => \$contaminants,
-"anno=s" => \$annotation,
-"build_contaminant" => \$build_contaminant,
-"ref=s" => \$ref,
-"build_ref" => \$build_ref,
-"dirbamCont=s" => \$dirBam_cont,
-"dirbam=s" => \$dirBam,
-"dirbedgraph=s" => \$dirBedgraph,
-"text=s" => \$outText,
-);
-
-$directional = lc($directional);
-$findSens = lc($findSens);
-$random_w = lc($random_w);
-
-if ($build_ref)
-{
-  `(bowtie2-build $ref $ref) 2> /dev/null `;
-}
-
-if ($build_contaminant)
-{
-  `(bowtie2-build $contaminants $contaminants) 2> /dev/null `;
-}
-
-#"outsimple=s" => \$outSimple
-#"annotation=s" => \$annotation,
-
-#$annotation = "/data/visiteur/annotation/ara/TAIR10_all_sorted.bed";
-#$annotation = "/data/home/pogorelcnikr/SG/humanGenes.bed";
-
-# mapping against contaminants databases  in: contaminants, fastq, # of mismatches, output_dir out: tophat dir
-tophat2($fastq, $contaminants, $mis, 1, 'dir_contaminant');
-`bamToFastq  -i dir_contaminant/unmapped.bam -fq clean.fastq`;
-
-# mapping against contaminants databases  in: $ref, fastq, # of mismatches, output_dir out: tophat dir
-tophat2('clean.fastq', $ref, $mis, $max_procs, 'dir_reference');
-
-my $repTot = 0;
-if ($ma == 0)
-{
-  open (my $rT,  'dir_reference/align_summary.txt');
-  my @repT = <$rT>;
-  $repTot = $1 if $repT[2] =~ /\s+Mapped\s+:\s+(\d+).*/;
-  close $rT;
-}
-else
-{
-  $repTot = $ma;
-}
-
-# Creation of bedgraphs
-my $dir_bedgraph = $fastqN.'_bedgraph/';
-findUnique('dir_reference/accepted_hits.bam', $dir_bedgraph, $repTot);
-
-# count in: unique_mapped.bam out: raw_count_simple antisens_count
-if ($random_w eq 'true')
-{
-  reference('rand_sorted.bam', $repTot);
-  `mv rand_sorted.bam dir_reference/random_sorted.bam`;
-}
-else
-{
-  reference('ac_sorted.bam', $repTot);
-  `mv ac_sorted.bam dir_reference/unique_sorted.bam`;
-}
-
-zip('dir_reference', $dirBam);
-zip('dir_contaminant', $dirBam_cont);
-zip( $dir_bedgraph , $dirBedgraph);
-
-#################################
-# inputs:
-# fastq $_[0]
-# bowtie2_index $_[1]
-# mismatches # $_[2]
-# output directory $_[3]
-# max_procs $_[4]
-#################################
-sub tophat2
-{
-  my ($fastq, $index, $mismtaches, $proc, $out_repertory) = @_;
-  $out_repertory = $out_repertory.'/' unless $out_repertory =~ /\/$/;
-  # print STDERR "/data/visiteur/tophat-2.0.12.Linux_x86_64/tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq\n";
-  `tophat2 -p $proc -N $mismtaches --b2-very-sensitive -o $out_repertory $index $fastq 2> /dev/stdin`;
-}
-############################################################################################
-
-#################################
-# inputs:
-# bam $_[0]
-# output directory $_[1]
-#################################
-sub findUnique
-{
-  my ($bam, $out_bedgraph, $sc) = @_;
-  
-  mkdir $out_bedgraph;
-
-  my $bedg_uni =  $out_bedgraph.'/'.$fastqN.'_unique.bedgraph' ;
-  my $bedg_all_multi = $out_bedgraph.'/'.$fastqN.'_all_multi.bedgraph' ;
-  my $bedg_random = $out_bedgraph.'/'.$fastqN.'_random_multi.bedgraph' ;
-
-  my $bedg_uni_p =  $out_bedgraph.'/'.$fastqN.'_unique_plusStrand.bedgraph' ;
-  my $bedg_all_multi_p = $out_bedgraph.'/'.$fastqN.'_all_multi_plusStrand.bedgraph' ;
-  my $bedg_random_p = $out_bedgraph.'/'.$fastqN.'_random_multi_plusStrand.bedgraph' ;
-
-  my $bedg_uni_m =  $out_bedgraph.'/'.$fastqN.'_unique_minusStrand.bedgraph' ;
-  my $bedg_all_multi_m = $out_bedgraph.'/'.$fastqN.'_all_multi_minusStrand.bedgraph' ;
-  my $bedg_random_m = $out_bedgraph.'/'.$fastqN.'_random_multi_minusStrand.bedgraph' ;
-  
-  my $sca = 1000000/$sc;
-
-  #unique file
-  `samtools view -H $bam > ac.sam 2> /dev/null && samtools view -@ $max_procs $bam 2> /dev/null |grep NH:i:1\$ >>  ac.sam && samtools view -@ $max_procs -Shb ac.sam > ac.bam 2> /dev/null`;
-  `samtools sort -@ $max_procs ac.bam -o ac_sorted.bam 2> /dev/null`;
-  `genomeCoverageBed -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni 2> /dev/null`;
-  if ($directional eq 'true')
-  {
-    `genomeCoverageBed -strand + -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_p 2> /dev/null`;
-    `genomeCoverageBed -strand - -scale $sca -split -bg -ibam ac_sorted.bam > $bedg_uni_m 2> /dev/null`;
-  }
-  #all multi
-  `samtools sort -@ $max_procs $bam -o accepted_hits_sorted.bam 2> /dev/null`;
-  `genomeCoverageBed -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi 2> /dev/null`;
-  if ($directional eq 'true')
-  {
-    `genomeCoverageBed -strand + -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_p 2> /dev/null`;
-    `genomeCoverageBed -strand - -scale $sca -split -bg -ibam accepted_hits_sorted.bam > $bedg_all_multi_m 2> /dev/null`;
-  }
-  #random multi
-  `samtools view -@ $max_procs -hb -F256 $bam > rand.bam 2> /dev/null`;
-  `samtools sort -@ $max_procs rand.bam -o rand_sorted.bam 2> /dev/null`;
-  `genomeCoverageBed -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random 2> /dev/null`;  
-  if ($directional eq 'true')
-  {
-    `genomeCoverageBed -strand + -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_p 2> /dev/null`;
-    `genomeCoverageBed -strand - -scale $sca -split -bg -ibam rand_sorted.bam > $bedg_random_m 2> /dev/null`;
-  }
-}
-############################################################################################
-
-#################################
-# inputs:
-# bam $_[0]
-# output directory $_[1]
-#################################
-sub reference
-{
-  my $bam = shift;
-  my $mapnum = shift;  
-
-  my @tabRef = ();
-  
-  open (my $anno, $annotation) || die "cannot open annotation file!\n";
-  while(<$anno>)
-  {
-    chomp $_;
-    my @split = split /\t/, $_;
-    my $size = $split[2] - $split[1];
-    push @tabRef, [$_,0,0,$size];
-  }
-  close $anno;
-  
-  `bedtools bamtobed -i $bam > ac.bed`;
-  
- # my $mapnum = 0;
- # open (my $bed, 'ac.bed') || die "cannot open ac.bed\n";
- # while(<$bed>){ $mapnum++; }
- # close $bed;
-  
-  if ($directional eq 'true')
-  {
-    #Same strand
-    `bedtools intersect -s -wao -b ac.bed -a $annotation> intersectStranded.bed`;
-    #Different strand
-    `bedtools intersect -S -wao -b ac.bed -a $annotation> intersectOppositeStrand.bed`;
-  
-    open (my $strand, 'intersectStranded.bed') || die "cannot open intersectStranded.bed\n";
-    my $i = 0;
-    my $firstLine = <$strand>; chomp $firstLine;
-    my @split = split /\t/, $firstLine;
-    my $prevId = $split[3];
-    if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
-    {
-      $tabRef[$i]->[1] = 1;
-    }
-    while(<$strand>)
-    {
-      chomp $_;
-      @split = split /\t/, $_;
-      if ($split[3] ne $prevId)
-      {
-        $i++;
-      }
-      if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
-      {
-        $tabRef[$i]->[1] += 1;
-      }
-      $prevId = $split[3];
-    }
-    close $strand;
-  
-    open ($strand, 'intersectOppositeStrand.bed') || die "cannot open intersectOppositeStrand.bed\n";
-    $i = 0;
-    $firstLine = <$strand>; chomp $firstLine;
-    @split = split /\t/, $firstLine;
-    $prevId = $split[3];
-    if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
-    {
-      $tabRef[$i]->[2] = 1;
-    }
-    while(<$strand>)
-    {
-      chomp $_;
-      @split = split /\t/, $_;
-      if ($split[3] ne $prevId)
-      {
-        $i++;
-      }
-      if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
-      {
-        $tabRef[$i]->[2] += 1;
-      }
-      $prevId = $split[3];
-    }
-    close $strand;
-  }
-
-  else
-  {
-    `bedtools intersect -wao -b ac.bed -a $annotation> intersect.bed`;
-
-    open (my $strand, 'intersect.bed') || die "cannot open intersect.bed\n";
-    my $i = 0;
-    my $firstLine = <$strand>; chomp $firstLine;
-    my @split = split /\t/, $firstLine;
-    my $prevId = $split[3];
-    if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
-    {
-      $tabRef[$i]->[1] = 1;
-    }
-    while(<$strand>)
-    {
-      chomp $_;
-      @split = split /\t/, $_;
-      if ($split[3] ne $prevId)
-      {
-        $i++;
-      }
-      if ($split[16] != 0 && $split[12] - $split[11] == $split[16])
-      {
-        $tabRef[$i]->[1] += 1;
-      }
-      $prevId = $split[3];
-    }
-    close $strand; 
-  }
-  
-  my $totplus = 0; my $totminus = 0;
-  foreach my $outPut (@tabRef)
-  {
-    $totplus += $outPut->[1];
-    $totminus += $outPut->[2];
-  }
-    
-  open (my $out, ">".$outText) || die "cannot open raw_count.txt\n";
-  print $out "Chr\tstart\tend\tID\tscore\tstrand\ttype\tD1\tD2\tD3\treads\trpm\treads on opposite strand\trpm\n";
-  my ($Sr, $Rr) = (0,0);
-  foreach my $outPut (@tabRef)
-  {
-    if ($totplus > $totminus || $findSens eq 'false')
-    {
-      $Sr = rpm($outPut->[1],$mapnum);
-      $Rr = rpm($outPut->[2],$mapnum);
-      print $out $outPut->[0]."\t".$outPut->[1]."\t".$Sr."\t".$outPut->[2]."\t".$Rr."\n";
-    }
-    else
-    {
-      $Rr = rpm($outPut->[1],$mapnum);
-      $Sr = rpm($outPut->[2],$mapnum);
-      print $out $outPut->[0]."\t".$outPut->[2]."\t".$Sr."\t".$outPut->[1]."\t".$Rr."\n";
-    }
-  }
-  close $out;
-}
-############################################################################################
-
-#################################
-# inputs:
-# bam $_[0]
-# output directory $_[1]
-#################################
-sub rpm
-{
-  my $raw_count = shift;
-  my $mapped_number = shift;
-  my $RPM = 0;
-  $RPM = ($raw_count * 1000000) / ($mapped_number) if  $mapped_number != 0;
-  return $RPM;
-}
-############################################################################################
-
-#################################
-# inputs:
-# bam $_[0]
-# output directory $_[1]
-#################################
-sub zip
-{
-  my ($in_dir, $out_tar) = @_;
-  `tar -cvf tmp.tar $in_dir/`;
-  `mv tmp.tar $out_tar`;
-  rmdir $in_dir;
-}
-############################################################################################
-