Mercurial > repos > romaingred > srnapipe
comparison bin/resize.pm @ 4:bf3c8bf8b819 draft
Deleted selected files
| author | romaingred |
|---|---|
| date | Wed, 13 Dec 2017 10:24:17 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:e9c9cee888cf | 4:bf3c8bf8b819 |
|---|---|
| 1 package resize; | |
| 2 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 | |
| 6 use FindBin; | |
| 7 use lib $FindBin::Bin; | |
| 8 use Rcall qw ( histogram ); | |
| 9 | |
| 10 use Exporter; | |
| 11 our @ISA = qw( Exporter ); | |
| 12 our @EXPORT_OK = qw( &size_distribution ); | |
| 13 | |
| 14 sub size_distribution | |
| 15 { | |
| 16 my ( $fastq, $fastq_out, $dir, $min, $max ) = @_; | |
| 17 | |
| 18 my ( %fragments_size, %duplicates ) ; | |
| 19 my $num = size($min, $max, $fastq, $fastq_out, \%fragments_size, \%duplicates); | |
| 20 | |
| 21 my $png = $dir.'histogram.png'; | |
| 22 histogram(\%fragments_size, $png, $num); | |
| 23 | |
| 24 my $size = $dir.'reads_size.txt'; | |
| 25 | |
| 26 | |
| 27 my $pourcentage; | |
| 28 open my $o, '>', $size || die "cannot open $size $!\n"; | |
| 29 print $o "size\tnumber\tpercentage\n"; | |
| 30 foreach my $k (sort { $a <=> $b } keys %fragments_size ) | |
| 31 { | |
| 32 $pourcentage = $fragments_size{$k} / $num * 100; | |
| 33 | |
| 34 print $o "$k\t$fragments_size{$k}\t"; | |
| 35 printf $o "%.2f\n",$pourcentage; | |
| 36 } | |
| 37 close $o; | |
| 38 | |
| 39 my $dup = $dir.'duplicates.txt' ; | |
| 40 open $o, '>', $dup || die "cannot open $size $!\n"; | |
| 41 print $o "size\tnumber\n"; | |
| 42 foreach my $k (sort { $duplicates{$b} <=> $duplicates{$a} } keys %duplicates ) | |
| 43 { | |
| 44 print $o "$k\t$duplicates{$k}\n"; | |
| 45 } | |
| 46 close $o; | |
| 47 } | |
| 48 | |
| 49 sub size | |
| 50 { | |
| 51 my ($min, $max, $in_file, $out_file, $sizeHashR, $duplicateHashR) = @_; | |
| 52 my ($numreads, $size, $cmp, $ok, $line) = (0, 0, 0, 0); | |
| 53 my @fastq; | |
| 54 open (my $in, $in_file) || die "cannot open $in_file $!\n"; | |
| 55 open (my $out, ">".$out_file) || die "cannot create $out_file $!\n"; | |
| 56 while(<$in>) | |
| 57 { | |
| 58 chomp $_; | |
| 59 $cmp++; $line++; | |
| 60 if ($cmp == 1) | |
| 61 { | |
| 62 die "file do not contain a @ at line $line\n" unless ($_ =~ /^\@/ ); | |
| 63 $ok = 0; @fastq = (); | |
| 64 push(@fastq,$_); | |
| 65 } | |
| 66 elsif ($cmp == 2) | |
| 67 { | |
| 68 #die "unrecognized symbol at line $line\n" unless ($_ =~ /[atcgATCGnN]+/ || $_ =~ /^$/ ); | |
| 69 push(@fastq,$_); | |
| 70 $size = length($_); | |
| 71 if ($size >= $min && $size <= $max) | |
| 72 { | |
| 73 $numreads++; | |
| 74 ${$sizeHashR}{$size}+=1; | |
| 75 ${$duplicateHashR}{$_}+=1 if (defined($duplicateHashR)); | |
| 76 $ok = 1; | |
| 77 } | |
| 78 } | |
| 79 elsif ($cmp == 3 ) | |
| 80 { | |
| 81 die "file do not contain a + at line $line\n" unless $_ =~ /^\+/; | |
| 82 push(@fastq,$_); | |
| 83 } | |
| 84 elsif ($cmp == 4 ) | |
| 85 { | |
| 86 push(@fastq,$_); | |
| 87 $cmp = 0; | |
| 88 if ($ok == 1) | |
| 89 { | |
| 90 foreach my $t (@fastq) | |
| 91 { | |
| 92 print $out $t."\n"; | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 } | |
| 97 close $in; close $out; | |
| 98 return $numreads; | |
| 99 } | |
| 100 | |
| 101 1; |
