Mercurial > repos > big-tiandm > mirplant2

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/miRNA_Express_and_sequence.pl	Thu Jul 31 03:07:46 2014 -0400
@@ -0,0 +1,173 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-6-4
+#Modified:
+#Description: solexa miRNA express and sequence
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
+if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'list'};
+my $out=$opts{'fa'};
+my $preout=$opts{'pre'};
+
+=cut
+my %hash_pri;
+open PRI,"<$opts{p}";
+while (my $aline=<PRI>) {
+	chomp $aline;
+	if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
+}
+close PRI;
+=cut
+
+open IN,"<$filein"; #input file
+open OUT,">$fileout"; #output file
+open FA ,">$out";
+open PRE,">$preout";
+
+print OUT "#ID\tcoordinate\tpos1\tpos2";
+my @marks=split/\,/,$opts{'tag'};
+foreach  (@marks) {
+	print OUT "\t",$_,"_matureExp";
+}
+foreach  (@marks) {
+	print OUT "\t",$_,"_starExp";
+}
+foreach  (@marks) {
+	print OUT "\t",$_,"_totalExp";
+}
+
+print OUT "\n";
+
+my (%uniq_id,$novel);
+while (my $aline=<IN>) {
+	chomp $aline;
+	until ($aline =~ /^score\s+[-\d\.]+/){
+		$aline = <IN>;
+		if (eof) {last;}
+	}
+	if (eof) {last;}
+########## miRNA ID ################
+	$novel++;
+########### annotate####################
+	do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
+	chomp $aline;
+	my @flank1=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
+	chomp $aline;
+	my @flank2=split/\t/,$aline;
+#
+########## mature start loop pre ####
+	do {$aline=<IN>;} until($aline=~/mature_beg/) ;
+	chomp $aline;
+	my @start=split/\t/,$aline;
+#	$start[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/mature_end/) ;
+	chomp $aline;
+	my @end=split/\t/,$aline;
+#	$end[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/mature_seq/) ;
+	chomp $aline;
+	my @arr1=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/pre_seq/) ;
+	chomp $aline;
+	my @arr2=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/pri_id/) ;
+	chomp $aline;
+	my @pri_id=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/pri_seq/) ;
+	chomp $aline;
+	my @pri_seq=split/\t/,$aline;
+	do {$aline=<IN>;} until($aline=~/star_beg/) ;
+	chomp $aline;
+	my @star_start=split/\t/,$aline;
+#	$star_start[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/star_end/) ;
+	chomp $aline;
+	my @star_end=split/\t/,$aline;
+#	$star_end[1] -=$flank1[1];
+	do {$aline=<IN>;} until($aline=~/star_seq/) ;
+	chomp $aline;
+	my @arr3=split/\t/,$aline;
+	print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
+	#print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
+	print FA ">miR-c-$novel\n$arr1[1]\n";
+	print PRE ">miR-c-$novel\n$pri_seq[1]\n";
+########## reads count #############
+	<IN>;
+	my @count1;my @count2;my @count3;my @count4;
+	$aline=<IN>;
+	do {
+		chomp $aline;
+		my @reads=split/\t/,$aline;
+		my @pos=();
+		$reads[5]=~/(\d+)\.\.(\d+)/;
+#		$pos[0] =$1-$flank1[1];
+#		$pos[1] =$2-$flank1[1];
+		$pos[0]=$1;
+		$pos[1]=$2;
+		$reads[0]=~/:([\d|_]+)_x(\d+)$/;
+		my @ss=split/_/,$1;
+		for (my $i=0;$i<@ss ;$i++) {
+			if (!(defined $count3[$i])) {
+				$count3[$i]=0;
+			}
+			if (!(defined $count4[$i])) {
+				$count4[$i]=0;
+			}
+			$count2[$i]+=$ss[$i];
+
+		}
+#		$count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
+#		$count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
+#		$count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
+#		$count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
+		if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
+		{
+			for (my $i=0;$i<@ss;$i++) {
+				$count3[$i]+=$ss[$i];
+			}
+		}
+		if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
+			for (my $i=0;$i<@ss;$i++) {
+				$count4[$i]+=$ss[$i];
+			}
+		}
+		$aline=<IN>;
+		chomp $aline;
+	} until(length $aline < 1) ;
+	$"="\t";
+	print OUT "@count3\t@count4\t@count2\n";
+	$"=" ";
+}
+
+close IN;
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -list -fa -pre -tag
+options:
+-i input file,predictions file
+-list output file miRNA list file
+-fa output file ,miRNA sequence fasta file.
+-pre output file, miRNA precursor fasta file.
+-tag string, sample names# eg: samA,samB,samC
+-h help
+USAGE
+exit(1);
+}
+