Mercurial > repos > yufei-luo > s_mart
comparison SMART/bacteriaRegulatoryRegion_Detection/coverageGff.pl @ 18:94ab73e8a190
Uploaded
| author | m-zytnicki |
|---|---|
| date | Mon, 29 Apr 2013 03:20:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:b0e8584489e6 | 18:94ab73e8a190 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 ### | |
| 3 # But : ajout ou modif de la couverture d'un gff | |
| 4 # | |
| 5 # Entrees : fichier gff | |
| 6 # | |
| 7 # Sortie : gff affiche a l'ecran | |
| 8 # | |
| 9 ###------------------------------------------------------ | |
| 10 | |
| 11 #!/usr/bin/perl -w | |
| 12 | |
| 13 use vars qw($USAGE); | |
| 14 use strict; | |
| 15 | |
| 16 =head1 NAME | |
| 17 | |
| 18 coverageGff.pl - add or compute the coverage of a gff file | |
| 19 | |
| 20 =head1 SYNOPSIS | |
| 21 | |
| 22 % coverageGff.pl -i file.gff -l readLength [-h] | |
| 23 | |
| 24 =head1 DESCRIPTION | |
| 25 This script will parse gff file, compute read coverage form the "nbElements" tag and write coverage in gff3 format. | |
| 26 | |
| 27 -i|--input fileName gff input file name | |
| 28 -l|--length ReadLength lenght of the reads in bp [38 default] | |
| 29 -o|--output fileName gff3 output file name | |
| 30 [-h|--help] help mode then die | |
| 31 | |
| 32 =head1 AUTHOR - Claire Toffano-Nioche - fev.11 | |
| 33 | |
| 34 =cut | |
| 35 #----------------------- | |
| 36 my ($fileName, $length, $outFileName) = ("", 38, "coverageOut.gff3") ; | |
| 37 # command line check | |
| 38 foreach my $num (0 .. $#ARGV) { | |
| 39 SWITCH: for ($ARGV[$num]) { | |
| 40 /--input|-i/ && do { | |
| 41 $fileName=$ARGV[$num+1]; | |
| 42 open ( fichierGff, "< $fileName" ) or die "Can't open gff file: \"$fileName\"\n" ; | |
| 43 last }; | |
| 44 /--length|-l/ && do { | |
| 45 $length=$ARGV[$num+1]; | |
| 46 last }; | |
| 47 /--help|-h/ && do { exec("pod2text $0\n") ; die }; | |
| 48 } | |
| 49 } | |
| 50 # informations retrieval | |
| 51 # open(OUT,">$outFileName") or die "Error can't $outFileName open for output. $!\n"; | |
| 52 my @lines = <fichierGff> ; | |
| 53 close fichierGff ; | |
| 54 # treatment | |
| 55 #print "gff file read ; number of lines : $#lines\n"; | |
| 56 for (my $i=0 ; $i <= $#lines ; $i++) { | |
| 57 # compute coverage : | |
| 58 if ($lines[$i] =~ /nbElements=/) { | |
| 59 my ($nbE)=($lines[$i] =~ /nbElements=(\d+)/) ; | |
| 60 my @gffCol=split("\t", $lines[$i]) ; | |
| 61 # print "ligne : $i, nbE : $nbE, length : $length, debut : $gffCol[3], fin : $gffCol[4].\n"; | |
| 62 my $cover=$length*$nbE/($gffCol[4]-$gffCol[3]+1) ; | |
| 63 $cover=int(100*$cover+0.5)/100 ; # arronri sup. precision 2 chiffres | |
| 64 if ($lines[$i] =~ /coverage=/) { # replace coverage | |
| 65 $lines[$i] =~ s/coverage=.*;/coverage=$cover;/ ; | |
| 66 } else { # add coverage | |
| 67 $lines[$i] =~ s/;/;coverage=$cover;/ ; | |
| 68 } | |
| 69 } | |
| 70 # print OUT $lines[$i] ; | |
| 71 print $lines[$i] ; | |
| 72 } | |
| 73 #close OUT ; | |
| 74 exit(0); |
