view check2.pl @ 6:f1696b304b8d draft default tip

Uploaded
author mkhan1980
date Thu, 25 Apr 2013 11:22:23 -0400
parents 2cceb9398d33
children
line wrap: on
line source

  #!/usr/bin/perl -w
use strict;

# Define variables
my @temp=();
my $result1;
my $result2;
my $result3;
my $result4;
my $result5;
my $result6;
my $resultfinal;
my $count;
my $coun;
my $cou;
my @digit=();
my $digit;
my $marks;
my $log;
my $coll;
my @scorearray=();
my $scorearray;
my $percent;
my $kount;
my @result=();
my $result;
my %final=();
my $final;
my @c=();
my @matrix1;
my @matrix2;
my $matrix1;
my $matrix2;
$coll=0;
my $count2;
my $var;
my $entry1;
my $entry2;
my $reventry1;
my $reventry2;
my $revvar;
my @revmatrix1;
my $revkount;
my $revcoun;
my $revcount2;
my @revtemp;
my $revcoll;
my @revdigit;
my $revdigit;
my $revmarks;
my $revresult1;
my $revresult2;
my $revresult3;
my $revresult4;
my $revresult5;
my $revresult6;
my $revresultfinal;
my @revscorearray;
my $revscorearray;



#define variables from configuration file  
open (IN, "<$ARGV[0]");
open (IN2, "<$ARGV[1]");
open (OUT, ">$ARGV[2]");

#assign arrays to variables from configuration file
my @array5=<IN>;

my @coordinates=<IN2>;


#split the chromosome number and starting position from coordinates file into 2 separate strings

foreach my $coordinates(@coordinates) {

chomp($coordinates);

my @coordinates2=split(/\s+/, $coordinates);

my $coordinates2;

$entry1=$coordinates2[0];
$entry2=$coordinates2[1];

}


print OUT "CTCF Site", "\t", "Chromosome no.", "\t", "Start", "\t", "End", "\t", "Score", "\t", "Strand", "\n";

chomp (@array5);

my $digits=join("", @array5);

my @yeslap = $digits =~ /(?=(\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w\w))/g;


    $var = "@yeslap\n";


@matrix1=qw/87.25 291.25 76.25 459.25 167.25 145.25 414.25 187.25 281.25 49.25 449.25 134.25 56.25 800.25 21.25 36.25 8.25 903.25 0.25 2.25 744.25 13.25 65.25 91.25 40.25 528.25 334.25 11.25 107.25 433.25 48.25 324.25 851.25 11.25 32.25 18.25 5.25 0.25 903.25 3.25 333.25 3.25 566.25 9.25 54.25 12.25 504.25 341.25 12.25 0.25 890.25 8.25 56.25 8.25 775.25 71.25 104.25 733.25 5.25 67.25 372.25 13.25 507.25 17.25 82.25 482.25 307.25 37.25 117.25 322.25 73.25 396.25 402.25 181.25 266.25 59.25/;

	$kount=0;
	  		$coun=0;

# Define the pattern for CTCF. Because of pseudocount, a wildcard is allowed at 
#each position. 
  	  		my $pattern = "[ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN][ATGCN]";
	   
# Compare the pattern with the 19 bp nucleotide segments.

				while($var =~ m/$pattern/gi)
				{
					$coun++;
					$count2++;
					my $endpos = pos $var;

# Get the starting and ending positions of the matched pattern.
				
					my $startpos=($endpos+1)-19;
					my $lastpos=$endpos;
				

					my $consensus = substr($var, ($startpos-1), 19);
			
					push(@temp, $consensus, $startpos, $lastpos);
							
					$coll=0;
					$kount++;
				    

# Split the matched pattern into 19 single bases.

					@digit = split(//, $consensus);


# For each base, if the base is A, calculate the weight score of A according to 
#its frequency in the CTCF Position Frequency Matrix.			
						foreach $digit (@digit)
						{


	if($digit =~ m/A/)
							{

							
							    my $ref = \@matrix1;
							  $marks = @{$ref}[$coll];
							   
							  
							  $result1 = sqrt(914);
							 
							  $result2 = $result1*0.3;
							  
							  $result3 = $result2+$marks;
							  $result4 = sqrt(914);
							  $result5 = $result4+914;
							  $result6 = 0.3;
							
$resultfinal = log($result3/$result5/$result6)/log(2);

						 

							   

push(@scorearray, $resultfinal);
		     
						       
		
					}

if($digit =~ m/C/)

{

							 my $ref = \@matrix1;
							 
							  $marks = @{$ref}[$coll + 1];
 
							  $result1 = sqrt(914);
							  $result2 = $result1*0.2;
							  $result3 = $result2+$marks;
							  $result4 = sqrt(914);
							  $result5 = $result4+914;
							  $result6 = 0.2;
							
$resultfinal = log($result3/$result5/$result6)/log(2);

push(@scorearray, $resultfinal);


}


if($digit =~ m/G/)

{

							  my $ref = \@matrix1;
							 
							  $marks = @{$ref}[$coll+2];

							  $result1 = sqrt(914);
							  $result2 = $result1*0.2;
							  $result3 = $result2+$marks;
							  $result4 = sqrt(914);
							  $result5 = $result4+914;
							  $result6 = 0.2;
							
$resultfinal = log($result3/$result5/$result6)/log(2);


push(@scorearray, $resultfinal);

}

if($digit =~ m/T/)

{

							  my $ref = \@matrix1;
							 
							  $marks = @{$ref}[$coll+3];

							  $result1 = sqrt(914);
							  $result2 = $result1*0.3;
							  $result3 = $result2+$marks;
							  $result4 = sqrt(914);
							  $result5 = $result4+914;
							  $result6 = 0.3;
							
$resultfinal = log($result3/$result5/$result6)/log(2);


push(@scorearray, $resultfinal);

}

	$coll=$coll + 4;

						}

					@digit=();
my $tem=0;


foreach $scorearray(@scorearray)
{

    $tem = $tem + $scorearray;


}

@scorearray = ();


my $fpercent = $tem;


if ($fpercent >= 18) {

    print OUT $consensus, "\t", $entry1, "\t", $entry2 - 18 - $count2, "\t", $entry2 - $count2, "\t", "$fpercent", "\t", "-", "\n";


}			

				}


close ( OUT );
close ( IN );
close ( IN2 );