Mercurial > repos > romaingred > pirna_pipeline
comparison bin/piPipe.pl @ 35:67edc34cf0ef draft
Uploaded
author | romaingred |
---|---|
date | Tue, 28 Nov 2017 09:15:29 -0500 |
parents | 46d7549c1a05 |
children | af4ee65eaffb |
comparison
equal
deleted
inserted
replaced
34:02c46c8164a8 | 35:67edc34cf0ef |
---|---|
15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm BWA_call get_fastq_seq extract_sam sam_to_bam_bg ); | 15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm BWA_call get_fastq_seq extract_sam sam_to_bam_bg ); |
16 use html qw ( main_page details_pages menu_page ppp_page ); | 16 use html qw ( main_page details_pages menu_page ppp_page ); |
17 use File::Copy; | 17 use File::Copy; |
18 | 18 |
19 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out); | 19 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out); |
20 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $exons, $TE ); | 20 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE ); |
21 my ( $si_min, $si_max, $pi_min, $pi_max ); | 21 my ( $si_min, $si_max, $pi_min, $pi_max ); |
22 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_exons, $build_TE ); | 22 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ); |
23 my $max_procs = 8; | 23 my $max_procs = 8; |
24 | 24 |
25 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_exons, $build_TE ) = (0,0,0,0,0,0,0); | 25 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0); |
26 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 ); | 26 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 ); |
27 $Pcheck ='true'; | 27 $Pcheck ='true'; |
28 | 28 |
29 GetOptions ( | 29 GetOptions ( |
30 "fastq=s" => \@fastq, | 30 "fastq=s" => \@fastq, |
44 "ref:s" => \$ref, | 44 "ref:s" => \$ref, |
45 "tRNAs:s" => \$tRNAs, | 45 "tRNAs:s" => \$tRNAs, |
46 "rRNAs:s" => \$rRNAs, | 46 "rRNAs:s" => \$rRNAs, |
47 "snRNAs:s" => \$snRNAs, | 47 "snRNAs:s" => \$snRNAs, |
48 "miRNAs:s" => \$miRNAs, | 48 "miRNAs:s" => \$miRNAs, |
49 "exons:s" => \$exons, | 49 "transcripts:s" => \$transcripts, |
50 "TE:s" => \$TE, | 50 "TE:s" => \$TE, |
51 "build_index" => \$build_index, | 51 "build_index" => \$build_index, |
52 "build_tRNAs" => \$build_tRNAs, | 52 "build_tRNAs" => \$build_tRNAs, |
53 "build_snRNAs" => \$build_snRNAs, | 53 "build_snRNAs" => \$build_snRNAs, |
54 "build_miRNAs" => \$build_miRNAs, | 54 "build_miRNAs" => \$build_miRNAs, |
55 "build_exons" => \$build_exons, | 55 "build_transcripts" => \$build_transcripts, |
56 "build_rRNAs" => \$build_rRNAs, | 56 "build_rRNAs" => \$build_rRNAs, |
57 "build_TE" => \$build_TE | 57 "build_TE" => \$build_TE |
58 ); | 58 ); |
59 | 59 |
60 my $fq_collection = 'fastq_dir/'; | 60 my $fq_collection = 'fastq_dir/'; |
65 dircopy( $FindBin::Bin.'/js', $dir.'/js' ); | 65 dircopy( $FindBin::Bin.'/js', $dir.'/js' ); |
66 | 66 |
67 my $file = $dir.'report.txt'; | 67 my $file = $dir.'report.txt'; |
68 open my $report, '>', $file or die "Cannot open $file $!\n"; | 68 open my $report, '>', $file or die "Cannot open $file $!\n"; |
69 | 69 |
70 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_exons, \$exons], [$build_TE, \$TE] ); | 70 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] ); |
71 to_build ( \@toBuild, $report, $dir ); | 71 to_build ( \@toBuild, $report, $dir ); |
72 | 72 |
73 my $proc_child = ceil($max_procs / scalar(@fastq)); | 73 my $proc_child = ceil($max_procs / scalar(@fastq)); |
74 my $proc_grand_child = ceil($proc_child/4); | 74 my $proc_grand_child = ceil($proc_child/4); |
75 my $pm = Parallel::ForkManager->new($max_procs); | 75 my $pm = Parallel::ForkManager->new($max_procs); |
101 { | 101 { |
102 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt'); | 102 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt'); |
103 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix ); | 103 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix ); |
104 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix ); | 104 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix ); |
105 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix ); | 105 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix ); |
106 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $exons,@suffix ); | 106 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix ); |
107 | 107 |
108 $pm->start($fastq[$child]) and next; | 108 $pm->start($fastq[$child]) and next; |
109 | 109 |
110 my $dir_fq = $dir.$name.'/'; | 110 my $dir_fq = $dir.$name.'/'; |
111 mkdir $dir_fq; | 111 mkdir $dir_fq; |
148 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' ); | 148 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' ); |
149 | 149 |
150 my $group_dir = $dir_fq.'subgroups/'; | 150 my $group_dir = $dir_fq.'subgroups/'; |
151 my $fastq_uni = $gen_dir.'unique.fastq'; | 151 my $fastq_uni = $gen_dir.'unique.fastq'; |
152 my $fastq_all = $gen_dir.'all.fastq'; | 152 my $fastq_all = $gen_dir.'all.fastq'; |
153 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $exons, $TE, $si_min, $si_max, $pi_min, $pi_max, $report); | 153 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report); |
154 | 154 |
155 pie_chart($group_dir); | 155 pie_chart($group_dir); |
156 | 156 |
157 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!"; | 157 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!"; |
158 my %dupnum_genome; | 158 my %dupnum_genome; |
170 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt'; | 170 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt'; |
171 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam ); | 171 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam ); |
172 | 172 |
173 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo ); | 173 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo ); |
174 | 174 |
175 my ( $sam_exons, $sam_TEs ) = ( $group_dir.'exons.sam', $group_dir.'TEs.sam' ); | 175 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' ); |
176 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' ); | 176 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' ); |
177 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs'); | 177 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs'); |
178 foreach my $grand_child ( 0 .. $#types ) | 178 foreach my $grand_child ( 0 .. $#types ) |
179 { | 179 { |
180 my $type_dir = $group_dir.$types_names[$grand_child].'/'; | 180 my $type_dir = $group_dir.$types_names[$grand_child].'/'; |
181 my $type_prefix = $types_names[$grand_child].'-'; | 181 my $type_prefix = $types_names[$grand_child].'-'; |
182 mkdir $type_dir; | 182 mkdir $type_dir; |
183 $pm2->start($types[$grand_child]) and next; | 183 $pm2->start($types[$grand_child]) and next; |
184 my ( $type_sam_genome, $type_sam_TEs, $type_sam_exons ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'exons.sam' ); | 184 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' ); |
185 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_exons ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'exons_unique.sam' ); | 185 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' ); |
186 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_exons_fastq ) = ( $fq_collection.$type_prefix.'genome_uni.fastq', $fq_collection.$type_prefix.'TEs_uni.fastq', $fq_collection.$type_prefix.'exons_uni.fastq'); | 186 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$type_prefix.'genome_uni.fastq', $fq_collection.$type_prefix.'TEs_uni.fastq', $fq_collection.$type_prefix.'transcripts_uni.fastq'); |
187 my ( $type_genome_fastq, $type_TEs_fastq, $type_exons_fastq ) = ( $fq_collection.$type_prefix.'genome.fastq', $fq_collection.$type_prefix.'TEs.fastq', $fq_collection.$type_prefix.'exons.fastq'); | 187 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$type_prefix.'genome.fastq', $fq_collection.$type_prefix.'TEs.fastq', $fq_collection.$type_prefix.'transcripts.fastq'); |
188 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] ); | 188 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] ); |
189 | 189 |
190 if ( $grand_child == 1 ) | 190 if ( $grand_child == 1 ) |
191 { | 191 { |
192 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report ); | 192 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report ); |
193 BWA_call ( $exons, $types[$grand_child], $type_sam_exons, $mis, $proc_child, $report ); | 193 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report ); |
194 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report ); | 194 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report ); |
195 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq ); | 195 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq ); |
196 extract_sam ( undef, $type_sam_exons, $type_sam_exons, $type_sam_uni_exons, $type_exons_fastq, $type_uni_exons_fastq ); | 196 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); |
197 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); | 197 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); |
198 } | 198 } |
199 else | 199 else |
200 { | 200 { |
201 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq ); | 201 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq ); |
202 extract_sam ( $type_sequence_hashP, $sam_exons, $type_sam_exons, $type_sam_uni_exons, $type_exons_fastq, $type_uni_exons_fastq ); | 202 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq ); |
203 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); | 203 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); |
204 } | 204 } |
205 | 205 |
206 my $ex_count_file = $type_dir.'exons_reads_counts.txt'; | 206 my $ex_count_file = $type_dir.'transcripts_reads_counts.txt'; |
207 my ( $ex_count, $ex_ref_size ) = sam_count ( $type_sam_exons ); | 207 my ( $ex_count, $ex_ref_size ) = sam_count ( $type_sam_transcripts ); |
208 rpms_rpkm( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo ); | 208 rpms_rpkm( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo ); |
209 | 209 |
210 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs ); | 210 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs ); |
211 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt'; | 211 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt'; |
212 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt'; | 212 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt'; |
214 rpms_rpkm( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo ); | 214 rpms_rpkm( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo ); |
215 rpms_rpkm( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo ); | 215 rpms_rpkm( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo ); |
216 rpms_rpkm( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo ); | 216 rpms_rpkm( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo ); |
217 | 217 |
218 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child ); | 218 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child ); |
219 sam_sorted_bam ( $type_sam_exons, $grand_child ); sam_sorted_bam ( $type_sam_uni_exons, $grand_child ); | 219 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child ); |
220 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child ); | 220 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child ); |
221 | 221 |
222 my $Gviz_TEs = $type_dir.'Gviz_TEs/'; | 222 my $Gviz_TEs = $type_dir.'Gviz_TEs/'; |
223 mkdir $Gviz_TEs; | 223 mkdir $Gviz_TEs; |
224 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' ); | 224 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' ); |