view SMART/bacteriaRegulatoryRegion_Detection/seedGff.pl @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl -w
###
# But : extension des UTR5 à partir des clusters de reads
# 
# Entrees : fichier gff annotation + cluster
#
# Sortie : UTR5.gff
#
###------------------------------------------------------      
use vars qw($USAGE);                      
use strict;   
use Getopt::Long;                            

=head1 NAME

seedGff.pl  

=head1 SYNOPSIS

% seedGff.pl -i annotation.gff -p BeginPosFromAtg [-l lgSeed | -e EndPosFromAtg ] [-h] 

=head1 DESCRIPTION
This script will parse input gff file and write information in gff3 format.

    -i|--input fileName  	    gff input file name of annotations
    -p|--pos BeginPosFromAtg 	greather positive number for the begin position of the seed from Atg 
   [-l|--length seedLength]   	lentgth of the seed to compute (default 4nt)
   [-e|--end seedEnd]  		    end of the seed to compute (smaller positive number)
    -o|--output fileName  	    gff output file name
   [-h|--help]           	    help mode then die                              

=head1 AUTHOR - Claire Toffano-Nioche - mar.11
    from Claire Kuchly initial script

=cut
#-----------------------
my ($inFileName, $beginSeed, $endSeed, $lgSeed, $outFileName) = ("", 0, 0, 0, "SEED.gff") ;
   # command line check
    foreach my $num (0 .. $#ARGV) {
        SWITCH: for ($ARGV[$num]) {
        /--input|-i/ && do { 
		$inFileName=$ARGV[$num+1]; 
		open (INGFF, "< $inFileName" ) or die "Can't open gff file: \"$inFileName\"\n" ; 
		last };
        /--pos|-p/ && do { 
		$beginSeed=$ARGV[$num+1]; 
		last };
        /--end|-e/ && do { 
		$endSeed=$ARGV[$num+1]; 
		last };
        /--length|-l/ && do { 
		$lgSeed=$ARGV[$num+1]; 
		last };
        /--output|-o/ && do { 
		$outFileName=$ARGV[$num+1]; 
		last };
        /--help|-h/ && do { exec("pod2text $0\n") ; die };
        }
    }
    open(UTR5,">$outFileName") or die "Error can't $outFileName open for output. $!\n";
    if (($endSeed > 0) and ($lgSeed > 0)) {
	print "Error : only -e or -l definition, not both\n";
	exec("pod2text $0\n") ; die ;
    } elsif ($lgSeed > 0) {
	print "ERROR : Lg Seed => TODO \n";
    }

    #Création des fichiers de filtres (séquences UTR) :
        #print "Création des fichiers de séquences !\n";
###Creer les fichiers des séquences en 5' et 3' des gènes.
###Seed pour les clusters en 5' : il faut qu'ils soient encrés sur le -20 par rapport à l'ATG. Donc seed de -22/-18.
    while(my $ligne = <INGFF>){
		chomp($ligne);
		my @list = split(/\t/,$ligne) ;
		my $finUTR5 ;
		my $debUTR5 ;
		my $strand = $list[6] ;
		if($strand eq "+"){
			$finUTR5 = $list[3]-$endSeed;
			$debUTR5 = $list[3]-$beginSeed;
		} elsif($strand eq "-"){
			$debUTR5 = $list[4]+$endSeed;
			$finUTR5 = $list[4]+$beginSeed;
		}
		if($debUTR5 < 0){$debUTR5 =0;}
		if($finUTR5 < 0){$finUTR5 =0;}
		print UTR5 "$list[0]\t$list[1]\t5UTR\t$debUTR5\t$finUTR5\t$list[5]\t$list[6]\t$list[7]\t$list[8]\n"; 
    }
    close INGFF;
    close UTR5;
exit(0);