Mercurial > repos > stheil > readpercontig_blat
view perl/scripts/readPerContig.pl @ 2:ea81b455dbf6 draft default tip
Uploaded
author | stheil |
---|---|
date | Thu, 15 Oct 2015 10:12:03 -0400 |
parents | 3203097d0a70 |
children |
line wrap: on
line source
#!/usr/bin/perl use strict; use Data::Dumper; use Logger::Logger; use File::Basename; use Getopt::Long; use Cwd 'abs_path' ; use Tools::Fasta ; use Tools::Fastq ; my $ref=''; my @p1=''; my @p2=''; my @read=''; my $identity = 95; my $coverage = 60; my $verbosity=1; my $format; my $stat; my $blatOutput=''; my $statOutput=''; GetOptions( "r=s" => \$ref, "id=i" => \$identity, "cov=i" => \$coverage, "b|blat" => \$blatOutput, "o|output=s" => \$statOutput, "f=s" => \$format, "s=s" => \$stat, "v|verbosity=i" => \$verbosity ); Logger::Logger->changeMode($verbosity); &main; sub main { my $self = {}; bless $self; $self->_setOptions(); $self->_checkFormat($format); $self->{ref_tool} = Tools::Fasta->new(file => $self->{reference}); $self->_parseBlat(); $self->_print($self->{_statistics}, $self->{statOutput}); } sub _parseBlat { my ($self) = @_; open(PSL,$self->{blatOutput}) || $logger->logdie('Cant read psl file ' . $self->{blatOutput}); while(<PSL>){ chomp; my @line = split(/\t/,$_); my $tSize = $line[14]; my $tStart = $line[15]; my $tEnd = $line[16]; my $coverage = (($tEnd-$tStart)/$tSize) * 100; if($coverage >= $self->{_covThreshold}){ $self->{readNumber}->{$line[9]}++; $self->{totalReadNumber}++; $self->{size}->{$line[9]} = $line[10]; $self->{reads_size_sum}->{$line[9]} += $tSize; } else{ next; } } close PSL; } sub _print { my ($self, $mode, $statOutput) = @_; $logger->info('Printing results ...'); if(!defined $mode){ $mode = 'nb_reads'; } if(!defined $statOutput){ $statOutput = $self->{reference} . '.rn'; } open(OUT,">$statOutput"); foreach my $ref (sort(keys(%{$self->{ref_tool}->{index}}))){ my $value = 0; if(defined($self->{readNumber}->{$ref})){ if($mode eq 'nb_reads'){ $value = $self->{readNumber}->{$ref}; } elsif($mode eq 'rpkm'){ $value = $self->{readNumber}->{$ref} / ($self->{size}->{$ref}/1000 * $self->{totalReadNumber}/1000000); } elsif($mode eq 'mean_coverage'){ $value = $self->{reads_size_sum}->{$ref} / $self->{size}->{$ref}; } } print OUT $ref . "\t" . $value . "\n"; } close OUT; } sub _setOptions { my ($self) = @_; if(-e $ref){ $self->{reference} = abs_path($ref); } else{ $logger->error('You must provide a reference.'); &help; } if($identity >= 0 && $identity <= 100){ $self->{_idThreshold} = $identity; } else{ $logger->logdie('ERROR: identity must be between 0 and 100.'); } if($coverage >=0 && $coverage <= 100){ $self->{_covThreshold} = $coverage; } else{ $logger->logdie('ERROR: coverage must be between 0 and 100.'); } if($stat =~ /nb_reads|rpkm|mean_coverage/){ $self->{_statistics} = $stat; } else{ $logdie(''); } if($blatOutput eq ''){ $self->{blatOutput} = 'blat.psl'; } else{ $self->{blatOutput} = $blatOutput; } if($statOutput eq ''){ my($filename, $dirs, $suffix) = fileparse($ref, qr/\.[^.]*/); $statOutput = $filename . '.psl'; } else{ $self->{statOutput} = $statOutput; } } sub help { my $prog = basename($0) ; print STDERR <<EOF ; #### $prog #### # # AUTHOR: Sebastien THEIL # LAST MODIF: 16/07/2015 # PURPOSE: This script is basically used to map reads (using blat) on contigs (or scaffolds) and count the number of read per contig. Input can be multiple fasta or fastq. Output is a 2 column tabular file. USAGE: $prog -i singl.fastq -i singl.fasta -1 R1.fastq -2 R2.fastq .... ### OPTIONS ### -r <string> Rererence sequences file. -id <int> Identity threshold. (Default: $identity) -cov <int> Coverage threshold. (Default: $coverage) -b <string> Blat output file path -o <string> Output file path -s <string> Statistics to print : nb_reads, rpkm or mean_coverage. -v <int> Verbosity level. (0 -> 4). EOF exit(1); }