comparison bin/piPipe.pl @ 35:67edc34cf0ef draft

Uploaded
author romaingred
date Tue, 28 Nov 2017 09:15:29 -0500
parents 46d7549c1a05
children af4ee65eaffb
comparison
equal deleted inserted replaced
34:02c46c8164a8 35:67edc34cf0ef
15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm BWA_call get_fastq_seq extract_sam sam_to_bam_bg ); 15 use align qw ( to_build get_unique sam_count sam_count_mis sam_sorted_bam rpms_rpkm BWA_call get_fastq_seq extract_sam sam_to_bam_bg );
16 use html qw ( main_page details_pages menu_page ppp_page ); 16 use html qw ( main_page details_pages menu_page ppp_page );
17 use File::Copy; 17 use File::Copy;
18 18
19 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out); 19 my ( @fastq, @fastq_n, $dir, $min, $max, $mis, $misTE, $help, $Pcheck, $mapnumf, $html_out);
20 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $exons, $TE ); 20 my ( $ref, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE );
21 my ( $si_min, $si_max, $pi_min, $pi_max ); 21 my ( $si_min, $si_max, $pi_min, $pi_max );
22 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_exons, $build_TE ); 22 my ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE );
23 my $max_procs = 8; 23 my $max_procs = 8;
24 24
25 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_exons, $build_TE ) = (0,0,0,0,0,0,0); 25 ( $build_index, $build_tRNAs, $build_rRNAs, $build_snRNAs, $build_miRNAs, $build_transcripts, $build_TE ) = (0,0,0,0,0,0,0);
26 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 ); 26 ( $min, $max, $mis, $misTE, $si_min, $si_max, $pi_min, $pi_max, $dir ) = ( 18, 29, 0, 3, 21, 21, 23, 29 );
27 $Pcheck ='true'; 27 $Pcheck ='true';
28 28
29 GetOptions ( 29 GetOptions (
30 "fastq=s" => \@fastq, 30 "fastq=s" => \@fastq,
44 "ref:s" => \$ref, 44 "ref:s" => \$ref,
45 "tRNAs:s" => \$tRNAs, 45 "tRNAs:s" => \$tRNAs,
46 "rRNAs:s" => \$rRNAs, 46 "rRNAs:s" => \$rRNAs,
47 "snRNAs:s" => \$snRNAs, 47 "snRNAs:s" => \$snRNAs,
48 "miRNAs:s" => \$miRNAs, 48 "miRNAs:s" => \$miRNAs,
49 "exons:s" => \$exons, 49 "transcripts:s" => \$transcripts,
50 "TE:s" => \$TE, 50 "TE:s" => \$TE,
51 "build_index" => \$build_index, 51 "build_index" => \$build_index,
52 "build_tRNAs" => \$build_tRNAs, 52 "build_tRNAs" => \$build_tRNAs,
53 "build_snRNAs" => \$build_snRNAs, 53 "build_snRNAs" => \$build_snRNAs,
54 "build_miRNAs" => \$build_miRNAs, 54 "build_miRNAs" => \$build_miRNAs,
55 "build_exons" => \$build_exons, 55 "build_transcripts" => \$build_transcripts,
56 "build_rRNAs" => \$build_rRNAs, 56 "build_rRNAs" => \$build_rRNAs,
57 "build_TE" => \$build_TE 57 "build_TE" => \$build_TE
58 ); 58 );
59 59
60 my $fq_collection = 'fastq_dir/'; 60 my $fq_collection = 'fastq_dir/';
65 dircopy( $FindBin::Bin.'/js', $dir.'/js' ); 65 dircopy( $FindBin::Bin.'/js', $dir.'/js' );
66 66
67 my $file = $dir.'report.txt'; 67 my $file = $dir.'report.txt';
68 open my $report, '>', $file or die "Cannot open $file $!\n"; 68 open my $report, '>', $file or die "Cannot open $file $!\n";
69 69
70 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_exons, \$exons], [$build_TE, \$TE] ); 70 my @toBuild = ( [$build_index, \$ref], [$build_tRNAs, \$tRNAs], [$build_rRNAs, \$rRNAs], [$build_snRNAs, \$snRNAs], [$build_miRNAs, \$miRNAs], [$build_transcripts, \$transcripts], [$build_TE, \$TE] );
71 to_build ( \@toBuild, $report, $dir ); 71 to_build ( \@toBuild, $report, $dir );
72 72
73 my $proc_child = ceil($max_procs / scalar(@fastq)); 73 my $proc_child = ceil($max_procs / scalar(@fastq));
74 my $proc_grand_child = ceil($proc_child/4); 74 my $proc_grand_child = ceil($proc_child/4);
75 my $pm = Parallel::ForkManager->new($max_procs); 75 my $pm = Parallel::ForkManager->new($max_procs);
101 { 101 {
102 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt'); 102 my @suffix = ('.fastq', '.fastq.gz,', '.fq', '.fq.gz', 'ref', '.dat', '.fa','.fas','.fasta', '.txt');
103 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix ); 103 my ( $name, $path, $suffix ) = fileparse( $fastq[$child], @suffix );
104 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix ); 104 my ( $ref_name, $ref_path, $ref_suffix ) = fileparse( $ref, @suffix );
105 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix ); 105 my ( $TE_name, $TE_path, $TE_suffix ) = fileparse( $TE, @suffix );
106 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $exons,@suffix ); 106 my ( $ex_name, $ex_path, $ex_suffix ) = fileparse( $transcripts, @suffix );
107 107
108 $pm->start($fastq[$child]) and next; 108 $pm->start($fastq[$child]) and next;
109 109
110 my $dir_fq = $dir.$name.'/'; 110 my $dir_fq = $dir.$name.'/';
111 mkdir $dir_fq; 111 mkdir $dir_fq;
148 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' ); 148 bg_to_png ( $fai_file, $fastq_prefix.'_plus.bedgraph', $fastq_prefix.'_minus.bedgraph', $Gviz_dir_rand, 'Mb' );
149 149
150 my $group_dir = $dir_fq.'subgroups/'; 150 my $group_dir = $dir_fq.'subgroups/';
151 my $fastq_uni = $gen_dir.'unique.fastq'; 151 my $fastq_uni = $gen_dir.'unique.fastq';
152 my $fastq_all = $gen_dir.'all.fastq'; 152 my $fastq_all = $gen_dir.'all.fastq';
153 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $exons, $TE, $si_min, $si_max, $pi_min, $pi_max, $report); 153 my ($bo, $mi, $pi) = subgroups ( $fastq_all, $group_dir, $mis, $misTE, $proc_child, $tRNAs, $rRNAs, $snRNAs, $miRNAs, $transcripts, $TE, $si_min, $si_max, $pi_min, $pi_max, $report);
154 154
155 pie_chart($group_dir); 155 pie_chart($group_dir);
156 156
157 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!"; 157 open (my $dupnum, $gen_dir.'dup_mapnum.txt') || die "cannot open dup_mapnum.txt $!";
158 my %dupnum_genome; 158 my %dupnum_genome;
170 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt'; 170 my $mi_count_file = $group_dir.'miRNAs/miRNAs_reads_counts.txt';
171 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam ); 171 my ( $mi_count, $mi_ref_size ) = sam_count ( $mi_sam );
172 172
173 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo ); 173 rpms_rpkm( $mi_count, $mi_ref_size, $ma, $mi_count_file, $pi, $mi, $bo );
174 174
175 my ( $sam_exons, $sam_TEs ) = ( $group_dir.'exons.sam', $group_dir.'TEs.sam' ); 175 my ( $sam_transcripts, $sam_TEs ) = ( $group_dir.'transcripts.sam', $group_dir.'TEs.sam' );
176 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' ); 176 my @types = ($group_dir.'bonafide_reads.fastq', $group_dir.'miRNAs.fastq', $group_dir.'siRNAs.fastq', $group_dir.'piRNAs.fastq' );
177 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs'); 177 my @types_names = ('bonafide_reads', 'miRNAs', 'siRNAs', 'piRNAs');
178 foreach my $grand_child ( 0 .. $#types ) 178 foreach my $grand_child ( 0 .. $#types )
179 { 179 {
180 my $type_dir = $group_dir.$types_names[$grand_child].'/'; 180 my $type_dir = $group_dir.$types_names[$grand_child].'/';
181 my $type_prefix = $types_names[$grand_child].'-'; 181 my $type_prefix = $types_names[$grand_child].'-';
182 mkdir $type_dir; 182 mkdir $type_dir;
183 $pm2->start($types[$grand_child]) and next; 183 $pm2->start($types[$grand_child]) and next;
184 my ( $type_sam_genome, $type_sam_TEs, $type_sam_exons ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'exons.sam' ); 184 my ( $type_sam_genome, $type_sam_TEs, $type_sam_transcripts ) = ( $type_dir.$type_prefix.'genome.sam', $type_dir.$type_prefix.'TEs.sam', $type_dir.$type_prefix.'transcripts.sam' );
185 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_exons ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'exons_unique.sam' ); 185 my ( $type_sam_uni_genome, $type_sam_uni_TEs, $type_sam_uni_transcripts ) = ( $type_dir.$type_prefix.'genome_unique.sam', $type_dir.$type_prefix.'TEs_unique.sam', $type_dir.$type_prefix.'transcripts_unique.sam' );
186 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_exons_fastq ) = ( $fq_collection.$type_prefix.'genome_uni.fastq', $fq_collection.$type_prefix.'TEs_uni.fastq', $fq_collection.$type_prefix.'exons_uni.fastq'); 186 my ( $type_uni_genome_fastq, $type_uni_TEs_fastq, $type_uni_transcripts_fastq ) = ( $fq_collection.$type_prefix.'genome_uni.fastq', $fq_collection.$type_prefix.'TEs_uni.fastq', $fq_collection.$type_prefix.'transcripts_uni.fastq');
187 my ( $type_genome_fastq, $type_TEs_fastq, $type_exons_fastq ) = ( $fq_collection.$type_prefix.'genome.fastq', $fq_collection.$type_prefix.'TEs.fastq', $fq_collection.$type_prefix.'exons.fastq'); 187 my ( $type_genome_fastq, $type_TEs_fastq, $type_transcripts_fastq ) = ( $fq_collection.$type_prefix.'genome.fastq', $fq_collection.$type_prefix.'TEs.fastq', $fq_collection.$type_prefix.'transcripts.fastq');
188 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] ); 188 my $type_sequence_hashP = get_fastq_seq ( $types[$grand_child] );
189 189
190 if ( $grand_child == 1 ) 190 if ( $grand_child == 1 )
191 { 191 {
192 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report ); 192 BWA_call ( $TE, $types[$grand_child], $type_sam_TEs, $misTE, $proc_child, $report );
193 BWA_call ( $exons, $types[$grand_child], $type_sam_exons, $mis, $proc_child, $report ); 193 BWA_call ( $transcripts, $types[$grand_child], $type_sam_transcripts, $mis, $proc_child, $report );
194 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report ); 194 BWA_call ( $ref, $types[$grand_child], $type_sam_genome, $mis, $proc_child, $report );
195 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq ); 195 extract_sam ( undef, $type_sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_uni_TEs_fastq, $type_uni_TEs_fastq );
196 extract_sam ( undef, $type_sam_exons, $type_sam_exons, $type_sam_uni_exons, $type_exons_fastq, $type_uni_exons_fastq ); 196 extract_sam ( undef, $type_sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq );
197 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); 197 extract_sam ( undef, $type_sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq );
198 } 198 }
199 else 199 else
200 { 200 {
201 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq ); 201 extract_sam ( $type_sequence_hashP, $sam_TEs, $type_sam_TEs, $type_sam_uni_TEs, $type_TEs_fastq, $type_uni_TEs_fastq );
202 extract_sam ( $type_sequence_hashP, $sam_exons, $type_sam_exons, $type_sam_uni_exons, $type_exons_fastq, $type_uni_exons_fastq ); 202 extract_sam ( $type_sequence_hashP, $sam_transcripts, $type_sam_transcripts, $type_sam_uni_transcripts, $type_transcripts_fastq, $type_uni_transcripts_fastq );
203 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq ); 203 extract_sam ( $type_sequence_hashP, $sam_genome, $type_sam_genome, $type_sam_uni_genome, $type_genome_fastq, $type_uni_genome_fastq );
204 } 204 }
205 205
206 my $ex_count_file = $type_dir.'exons_reads_counts.txt'; 206 my $ex_count_file = $type_dir.'transcripts_reads_counts.txt';
207 my ( $ex_count, $ex_ref_size ) = sam_count ( $type_sam_exons ); 207 my ( $ex_count, $ex_ref_size ) = sam_count ( $type_sam_transcripts );
208 rpms_rpkm( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo ); 208 rpms_rpkm( $ex_count, $ex_ref_size, $ma, $ex_count_file, $pi, $mi, $bo );
209 209
210 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs ); 210 my ( $TEs_count, $TEs_ref_size, $TEs_count_NoM, $TEs_count_M ) = sam_count_mis ( $type_sam_TEs );
211 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt'; 211 my $TEs_count_file = $type_dir.$type_prefix.'TEs_reads_counts.txt';
212 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt'; 212 my $TEs_count_file_M = $type_dir.$type_prefix.'TEs_reads_counts_mismatches.txt';
214 rpms_rpkm( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo ); 214 rpms_rpkm( $TEs_count, $TEs_ref_size, $ma, $TEs_count_file, $pi, $mi, $bo );
215 rpms_rpkm( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo ); 215 rpms_rpkm( $TEs_count_NoM, $TEs_ref_size, $ma, $TEs_count_file_noM, $pi, $mi, $bo );
216 rpms_rpkm( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo ); 216 rpms_rpkm( $TEs_count_M, $TEs_ref_size, $ma, $TEs_count_file_M, $pi, $mi, $bo );
217 217
218 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child ); 218 sam_to_bam_bg ( $type_sam_TEs, $scale, $grand_child );
219 sam_sorted_bam ( $type_sam_exons, $grand_child ); sam_sorted_bam ( $type_sam_uni_exons, $grand_child ); 219 sam_sorted_bam ( $type_sam_transcripts, $grand_child ); sam_sorted_bam ( $type_sam_uni_transcripts, $grand_child );
220 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child ); 220 sam_sorted_bam ( $type_sam_uni_TEs, $grand_child );
221 221
222 my $Gviz_TEs = $type_dir.'Gviz_TEs/'; 222 my $Gviz_TEs = $type_dir.'Gviz_TEs/';
223 mkdir $Gviz_TEs; 223 mkdir $Gviz_TEs;
224 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' ); 224 bg_to_png ( $group_dir.'TEs.fai', $type_dir.$type_prefix.'TEs_plus.bedgraph', $type_dir.$type_prefix.'TEs_minus.bedgraph', $Gviz_TEs, 'Kb' );