Mercurial > repos > romaingred > pirna_pipeline
comparison bin/subgroups.pm @ 0:198009598544 draft
Uploaded
author | romaingred |
---|---|
date | Wed, 11 Oct 2017 09:57:58 -0400 |
parents | |
children | 509d95c26dab |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:198009598544 |
---|---|
1 package subgroups; | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use Exporter; | |
6 our @ISA = qw( Exporter ); | |
7 our @EXPORT_OK = qw( &subgroups ); | |
8 | |
9 use POSIX; | |
10 use FindBin; | |
11 use lib $FindBin::Bin; | |
12 use align qw ( get_hash_alignment ); | |
13 | |
14 sub subgroups | |
15 { | |
16 my ($fin, $dir, $mis, $mis_TE, $proc, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $exons, $TE, $min_si, $max_si, $min_pi, $max_pi, $report ) = @_; | |
17 my (@files, $sum, $pie, $repar, %ismapped, %isjunk, %repartition, @junk_ref, @all_ref ); | |
18 | |
19 print $report "----------------------------\n"; | |
20 print $report "Create subgroups:\nfastq_in: $fin\ndirectory_out: $dir\nmismatches: $mis\nmismatches TE: $mis_TE\n"; | |
21 | |
22 mkdir $dir; | |
23 $dir = $dir.'/' unless $dir =~ /(.*)\/$/; | |
24 | |
25 my $accept_miRNas = $dir.'miRNAs.fastq'; | |
26 my $reject_miRNAs = $dir.'miRNAs_rejected.fastq'; | |
27 my $sam_miRNAs = $dir.'miRNAs.sam'; | |
28 my @tmp = get_hash_alignment($miRNAs, $mis, 1, 1, $accept_miRNas, $reject_miRNAs, $fin, $proc, 'miRNAs',$sam_miRNAs, $report); | |
29 my $mi = $tmp[0]; | |
30 $repartition{'miRNAs'} = $mi; | |
31 | |
32 my $sam = new String::Random; | |
33 $sam = $sam->randpattern("CCcccccc"); | |
34 my $reject_rRNAs = $dir.'rRNAs_rejected.fastq'; | |
35 @tmp = get_hash_alignment($rRNAs, $mis, 0, 1, 'NA', $reject_rRNAs, $reject_miRNAs, $proc, 'rRNAs', $sam, $report); | |
36 $repartition{'rRNAs'} = $tmp[0]; | |
37 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
38 | |
39 $sam = new String::Random; | |
40 $sam = $sam->randpattern("CCcccccc"); | |
41 my $reject_tRNAs = $dir.'tRNAs_rejected.fastq'; | |
42 @tmp = get_hash_alignment($tRNAs, $mis, 0, 1, 'NA', $reject_tRNAs, $reject_rRNAs, $proc, 'tRNAs', $sam, $report); | |
43 $repartition{'tRNAs'} = $tmp[0]; | |
44 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
45 | |
46 $sam = new String::Random; | |
47 $sam = $sam->randpattern("CCcccccc"); | |
48 my $bonafide = $dir.'bonafide_reads.fastq'; | |
49 @tmp = get_hash_alignment($snRNAs, $mis, 0, 1, 'NA', $bonafide, $reject_tRNAs, $proc, 'snRNAs', $sam, $report); | |
50 $repartition{'snRNAs'} = $tmp[0]; | |
51 my $bo = $tmp[1]; | |
52 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
53 | |
54 my $sam_exons = $dir.'exons.sam'; | |
55 my $reject_exons = $dir.'rejected_exons.fastq'; | |
56 @tmp = get_hash_alignment($exons, $mis, 0, 1, 'NA', $reject_exons, $bonafide, $proc, 'exons', $sam_exons, $report, $dir.'exons.fai'); | |
57 $repartition{'exons'} = $tmp[0]; | |
58 | |
59 | |
60 my $sam_TEs = $dir.'TEs.sam'; | |
61 my $reject_TEs = $dir.'rejected.fastq'; | |
62 @tmp = get_hash_alignment($TE, $mis_TE, 0, 1, 'NA', $reject_TEs, $reject_exons, $proc, 'TEs', $sam_TEs, $report, $dir.'TEs.fai' ); | |
63 $repartition{'TEs'} = $tmp[0] ; $repartition{'others'} = $tmp[1]; | |
64 unlink $sam, $sam.'_aln.err', $sam.'_samse.err'; | |
65 unlink $reject_exons; | |
66 unlink $reject_rRNAs; | |
67 unlink $reject_miRNAs; | |
68 unlink $reject_tRNAs; | |
69 | |
70 #create repartition | |
71 my $pi = fastqSubgroups($bonafide, $dir, $min_si, $max_si, $min_pi, $max_pi ); | |
72 | |
73 open (my $re, '>'.$dir.'repartition.txt') || die "cannot open $dir repartition.txt $!\n"; | |
74 print $re "type\tnumber\tpercentage\n"; | |
75 $sum += $_ foreach values %repartition; | |
76 foreach my $k ( sort keys %repartition ) | |
77 { | |
78 my $prct = 0; | |
79 $prct = $repartition{$k} / $sum * 100 if $sum != 0; | |
80 print $re "$k\t$repartition{$k}\t"; printf $re "%.2f\n",$prct; | |
81 } | |
82 return ( $bo, $mi, $pi); | |
83 } | |
84 | |
85 sub fastqSubgroups | |
86 { | |
87 my ( $fastq, $output_directory, $min_si, $max_si, $min_pi, $max_pi ) = @_; | |
88 my $fastq_siRNA = $output_directory."siRNAs.fastq"; | |
89 my $fastq_piRNA = $output_directory."piRNAs.fastq"; | |
90 | |
91 open my $fic, '<', $fastq || die "cannot open input file $! \n"; | |
92 open my $si, '>', $fastq_siRNA || die "cannot open siRNA.fastq $! \n"; | |
93 open my $pi, '>', $fastq_piRNA || die "cannot open piRNA.fastq $! \n"; | |
94 | |
95 my ($length, $cmp, $type, $siRNA_number, $miRNA_h_number, $piRNA_number, $not_pi_number) = (0,0,0,0,0,0,0); | |
96 my (@fastq) =(); my $seq_name; | |
97 my $out; | |
98 while(<$fic>) | |
99 { | |
100 chomp $_; | |
101 $cmp++; | |
102 if ($cmp == 1) | |
103 { | |
104 die "file do not contain a @ at line $cmp\n" unless ($_ =~ /^\@/ ); | |
105 $type = 0; @fastq = (); | |
106 if ($_ =~ /^\@(.*)\s.*/) { $seq_name = $1;} | |
107 elsif ($_ =~ /^\@(.*)/) {$seq_name = $1;} | |
108 push(@fastq,$_); | |
109 } | |
110 elsif ($cmp == 2) | |
111 { | |
112 #die "unrecognized symbol at line $cmp\n" unless $_ =~ /[atcgATCGnN]+/; | |
113 push(@fastq,$_); | |
114 $length = length($_); | |
115 $type = 0; | |
116 if ( $length >= $min_si && $length <= $max_si ) | |
117 { | |
118 $type = 2; | |
119 $siRNA_number++; | |
120 } | |
121 if ($length >= $min_pi && $length <= $max_pi ) | |
122 { | |
123 $type += 4; | |
124 $piRNA_number++; | |
125 } | |
126 } | |
127 elsif ($cmp == 3 ) | |
128 { | |
129 die "file do not contain a + at line $cmp\n" unless $_ =~ /^\+/; | |
130 push(@fastq,$_); | |
131 } | |
132 elsif ($cmp == 4 ) | |
133 { | |
134 push(@fastq,$_); | |
135 $cmp = 0; | |
136 if ($type != 0) | |
137 { | |
138 if ($type & 4 ) { foreach my $t (@fastq) { print $pi $t."\n";} } | |
139 if ($type & 2 ) { foreach my $t (@fastq) { print $si $t."\n";} } | |
140 } | |
141 } | |
142 } | |
143 close $fic; | |
144 close $si; close $pi; | |
145 return ($piRNA_number); | |
146 } | |
147 | |
148 1; |