Mercurial > repos > romaingred > srnapipe
comparison bin/subgroups.pm @ 4:bf3c8bf8b819 draft
Deleted selected files
| author | romaingred |
|---|---|
| date | Wed, 13 Dec 2017 10:24:17 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:e9c9cee888cf | 4:bf3c8bf8b819 |
|---|---|
| 1 package subgroups; | |
| 2 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 use Exporter; | |
| 6 our @ISA = qw( Exporter ); | |
| 7 our @EXPORT_OK = qw( &subgroups ); | |
| 8 | |
| 9 use POSIX; | |
| 10 use FindBin; | |
| 11 use lib $FindBin::Bin; | |
| 12 use align qw ( get_hash_alignment ); | |
| 13 | |
| 14 sub subgroups | |
| 15 { | |
| 16 my ($fin, $dir, $mis, $mis_TE, $proc, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $min_si, $max_si, $min_pi, $max_pi, $report ) = @_; | |
| 17 my (@files, $sum, $pie, $repar, %ismapped, %isjunk, %repartition, @junk_ref, @all_ref ); | |
| 18 | |
| 19 srand(); | |
| 20 print $report "----------------------------\n"; | |
| 21 print $report "Create subgroups:\nfastq_in: $fin\ndirectory_out: $dir\nmismatches: $mis\nmismatches TE: $mis_TE\n"; | |
| 22 | |
| 23 mkdir $dir; | |
| 24 $dir = $dir.'/' unless $dir =~ /(.*)\/$/; | |
| 25 | |
| 26 my $accept_miRNas = $dir.'miRNAs.fastq'; | |
| 27 my $reject_miRNAs = $dir.'miRNAs_rejected.fastq'; | |
| 28 my $sam_miRNAs = $dir.'miRNAs.sam'; | |
| 29 my @tmp = get_hash_alignment($miRNAs, $mis, 1, 1, $accept_miRNas, $reject_miRNAs, $fin, $proc, 'miRNAs',$sam_miRNAs, $report); | |
| 30 my $mi = $tmp[0]; | |
| 31 $repartition{'miRNAs'} = $mi; | |
| 32 | |
| 33 my $sam = new String::Random; | |
| 34 $sam = $sam->randpattern("CCcccccc"); | |
| 35 my $reject_rRNAs = $dir.'rRNAs_rejected.fastq'; | |
| 36 @tmp = get_hash_alignment($rRNAs, $mis, 0, 1, 'NA', $reject_rRNAs, $reject_miRNAs, $proc, 'rRNAs', $sam, $report); | |
| 37 $repartition{'rRNAs'} = $tmp[0]; | |
| 38 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
| 39 | |
| 40 $sam = new String::Random; | |
| 41 $sam = $sam->randpattern("CCcccccc"); | |
| 42 my $reject_tRNAs = $dir.'tRNAs_rejected.fastq'; | |
| 43 @tmp = get_hash_alignment($tRNAs, $mis, 0, 1, 'NA', $reject_tRNAs, $reject_rRNAs, $proc, 'tRNAs', $sam, $report); | |
| 44 $repartition{'tRNAs'} = $tmp[0]; | |
| 45 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
| 46 | |
| 47 $sam = new String::Random; | |
| 48 $sam = $sam->randpattern("CCcccccc"); | |
| 49 my $bonafide = $dir.'bonafide_reads.fastq'; | |
| 50 @tmp = get_hash_alignment($snRNAs, $mis, 0, 1, 'NA', $bonafide, $reject_tRNAs, $proc, 'snRNAs', $sam, $report); | |
| 51 $repartition{'snRNAs'} = $tmp[0]; | |
| 52 my $bo = $tmp[1]; | |
| 53 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
| 54 | |
| 55 my $sam_transcripts = $dir.'transcripts.sam'; | |
| 56 my $reject_transcripts = $dir.'rejected_transcripts.fastq'; | |
| 57 @tmp = get_hash_alignment($transcripts, $mis, 0, 1, 'NA', $reject_transcripts, $bonafide, $proc, 'transcripts', $sam_transcripts, $report, $dir.'transcripts.fai'); | |
| 58 $repartition{'transcripts'} = $tmp[0]; | |
| 59 | |
| 60 | |
| 61 my $sam_TEs = $dir.'TEs.sam'; | |
| 62 my $reject_TEs = $dir.'rejected.fastq'; | |
| 63 @tmp = get_hash_alignment($TE, $mis_TE, 0, 1, 'NA', $reject_TEs, $reject_transcripts, $proc, 'TEs', $sam_TEs, $report, $dir.'TEs.fai' ); | |
| 64 $repartition{'TEs'} = $tmp[0] ; $repartition{'others'} = $tmp[1]; | |
| 65 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
| 66 unlink $reject_transcripts; | |
| 67 unlink $reject_rRNAs; | |
| 68 unlink $reject_miRNAs; | |
| 69 unlink $reject_tRNAs; | |
| 70 | |
| 71 #create repartition | |
| 72 my $pi = fastqSubgroups($bonafide, $dir, $min_si, $max_si, $min_pi, $max_pi ); | |
| 73 | |
| 74 open (my $re, '>'.$dir.'repartition.txt') || die "cannot open $dir repartition.txt $!\n"; | |
| 75 print $re "type\tnumber\tpercentage\n"; | |
| 76 $sum += $_ foreach values %repartition; | |
| 77 foreach my $k ( sort keys %repartition ) | |
| 78 { | |
| 79 my $prct = 0; | |
| 80 $prct = $repartition{$k} / $sum * 100 if $sum != 0; | |
| 81 print $re "$k\t$repartition{$k}\t"; printf $re "%.2f\n",$prct; | |
| 82 } | |
| 83 return ( $bo, $mi, $pi); | |
| 84 } | |
| 85 | |
| 86 sub fastqSubgroups | |
| 87 { | |
| 88 my ( $fastq, $output_directory, $min_si, $max_si, $min_pi, $max_pi ) = @_; | |
| 89 my $fastq_siRNA = $output_directory."siRNAs.fastq"; | |
| 90 my $fastq_piRNA = $output_directory."piRNAs.fastq"; | |
| 91 | |
| 92 open my $fic, '<', $fastq || die "cannot open input file $! \n"; | |
| 93 open my $si, '>', $fastq_siRNA || die "cannot open siRNA.fastq $! \n"; | |
| 94 open my $pi, '>', $fastq_piRNA || die "cannot open piRNA.fastq $! \n"; | |
| 95 | |
| 96 my ($length, $cmp, $type, $siRNA_number, $miRNA_h_number, $piRNA_number, $not_pi_number) = (0,0,0,0,0,0,0); | |
| 97 my (@fastq) =(); my $seq_name; | |
| 98 my $out; | |
| 99 while(<$fic>) | |
| 100 { | |
| 101 chomp $_; | |
| 102 $cmp++; | |
| 103 if ($cmp == 1) | |
| 104 { | |
| 105 die "file do not contain a @ at line $cmp\n" unless ($_ =~ /^\@/ ); | |
| 106 $type = 0; @fastq = (); | |
| 107 if ($_ =~ /^\@(.*)\s.*/) { $seq_name = $1;} | |
| 108 elsif ($_ =~ /^\@(.*)/) {$seq_name = $1;} | |
| 109 push(@fastq,$_); | |
| 110 } | |
| 111 elsif ($cmp == 2) | |
| 112 { | |
| 113 #die "unrecognized symbol at line $cmp\n" unless $_ =~ /[atcgATCGnN]+/; | |
| 114 push(@fastq,$_); | |
| 115 $length = length($_); | |
| 116 $type = 0; | |
| 117 if ( $length >= $min_si && $length <= $max_si ) | |
| 118 { | |
| 119 $type = 2; | |
| 120 $siRNA_number++; | |
| 121 } | |
| 122 if ($length >= $min_pi && $length <= $max_pi ) | |
| 123 { | |
| 124 $type += 4; | |
| 125 $piRNA_number++; | |
| 126 } | |
| 127 } | |
| 128 elsif ($cmp == 3 ) | |
| 129 { | |
| 130 die "file do not contain a + at line $cmp\n" unless $_ =~ /^\+/; | |
| 131 push(@fastq,$_); | |
| 132 } | |
| 133 elsif ($cmp == 4 ) | |
| 134 { | |
| 135 push(@fastq,$_); | |
| 136 $cmp = 0; | |
| 137 if ($type != 0) | |
| 138 { | |
| 139 if ($type & 4 ) { foreach my $t (@fastq) { print $pi $t."\n";} } | |
| 140 if ($type & 2 ) { foreach my $t (@fastq) { print $si $t."\n";} } | |
| 141 } | |
| 142 } | |
| 143 } | |
| 144 close $fic; | |
| 145 close $si; close $pi; | |
| 146 return ($piRNA_number); | |
| 147 } | |
| 148 | |
| 149 1; |
