Mercurial > repos > antmarge > dataoverview
comparison dataOverview.pl @ 3:80205e898861 draft default tip
New updates
author | antmarge |
---|---|
date | Tue, 02 May 2017 21:20:54 -0400 |
parents | b66f4a551e25 |
children |
comparison
equal
deleted
inserted
replaced
2:3ed885628c9f | 3:80205e898861 |
---|---|
282 print OUT "$sat%\tSaturation of TA sites after cutoff filter (validInsertions/TAsites)\n"; | 282 print OUT "$sat%\tSaturation of TA sites after cutoff filter (validInsertions/TAsites)\n"; |
283 print OUT "$inscov%\tGenome coverage by insertions (validInsertions/genomeSize)\n"; | 283 print OUT "$inscov%\tGenome coverage by insertions (validInsertions/genomeSize)\n"; |
284 print OUT "$tacov%\tGenome coverage by TA sites (TAsites/genomeSize)\n"; | 284 print OUT "$tacov%\tGenome coverage by TA sites (TAsites/genomeSize)\n"; |
285 print OUT "$lg_dist_ta\tLargest distance between TA sites\n"; | 285 print OUT "$lg_dist_ta\tLargest distance between TA sites\n"; |
286 print OUT "$lg_dist_ins\tLargest distance between insertions\n"; | 286 print OUT "$lg_dist_ins\tLargest distance between insertions\n"; |
287 print OUT "\n\nOpen Reading Frames\n\n"; | |
288 | 287 |
289 #Store everything to be print OUTed in array | 288 #Store everything to be print OUTed in array |
290 my @table; | 289 my @table; |
291 | |
292 #Find open reading frames from fasta file | |
293 local $_ = $fasta; | |
294 my @orfSize; | |
295 my @allc; #numbers of TAs in the ORFS here. | |
296 my $blank=0; #ORFS that don't have any TA sites. | |
297 my $orfCount=0; #keep track of the number of ORFs found. | |
298 my $minSize=0; | |
299 #Read somewhere that 99 is a good min but there is an annotated 86 bp gene for 19F | |
300 while ( /ATG/g ) { | |
301 my $start = pos() - 3; | |
302 if ( /T(?:AA|AG|GA)/g ) { | |
303 my $stop = pos; | |
304 my $size=$stop - $start; | |
305 if ($size>=$minSize){ | |
306 push (@orfSize,$size); | |
307 my $seq=substr ($_, $start, $stop - $start); | |
308 my @ctemp = $seq =~ /$x/g; | |
309 my $countTA = @ctemp; | |
310 if ($countTA==0){$blank++} | |
311 push (@allc,$countTA); | |
312 $orfCount++; | |
313 } | |
314 } | |
315 } | |
316 | |
317 print OUT "\nORFs based on Fasta sequence and start (ATG) and end (TAA,TAG,TGA) codons\n"; | |
318 push (@table,["Set minimum size for an ORF",$minSize]); | |
319 print OUT "$orfCount\tTotal number of ORFs found\n"; | |
320 my ($minORF, $maxORF) = minmax(@orfSize); | |
321 print OUT "$minORF\tSmallest ORF\n"; | |
322 print OUT "$maxORF\tLargest ORF\n"; | |
323 my ($mintaORF,$maxtaORF) = minmax(@allc); | |
324 print OUT "$mintaORF\tFewest # TA sites in an ORF\n"; | |
325 print OUT "$maxtaORF\tGreatest # TA sites in an ORF\n"; | |
326 print OUT "$blank\tNumber of ORFs that don't have any TA sites\n"; | |
327 | |
328 | 290 |
329 print OUT "\nGenes using the genbank annotation file\n\n"; | 291 print OUT "\nGenes using the genbank annotation file\n\n"; |
330 ###Get genbank file. Find all start and stop for genes | 292 ###Get genbank file. Find all start and stop for genes |
331 #See how many insertions fall into genes vs intergenic regions | 293 #See how many insertions fall into genes vs intergenic regions |
332 #Get array of coordinates for all insertions then remove insertion if it is | 294 #Get array of coordinates for all insertions then remove insertion if it is |