# HG changeset patch # User big-tiandm # Date 1406790466 14400 # Node ID feef3e20259126777dbf7a219054d81e583f0882 # Parent b3f9565b30b42c8351aaa6dc29ac1b648e9dd16a Uploaded diff -r b3f9565b30b4 -r feef3e202591 miRNA_Express_and_sequence.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/miRNA_Express_and_sequence.pl Thu Jul 31 03:07:46 2014 -0400 @@ -0,0 +1,173 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2014-6-4 +#Modified: +#Description: solexa miRNA express and sequence +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h"); +if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'list'}; +my $out=$opts{'fa'}; +my $preout=$opts{'pre'}; + +=cut +my %hash_pri; +open PRI,"<$opts{p}"; +while (my $aline=) { + chomp $aline; + if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;} +} +close PRI; +=cut + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file +open FA ,">$out"; +open PRE,">$preout"; + +print OUT "#ID\tcoordinate\tpos1\tpos2"; +my @marks=split/\,/,$opts{'tag'}; +foreach (@marks) { + print OUT "\t",$_,"_matureExp"; +} +foreach (@marks) { + print OUT "\t",$_,"_starExp"; +} +foreach (@marks) { + print OUT "\t",$_,"_totalExp"; +} + +print OUT "\n"; + +my (%uniq_id,$novel); +while (my $aline=) { + chomp $aline; + until ($aline =~ /^score\s+[-\d\.]+/){ + $aline = ; + if (eof) {last;} + } + if (eof) {last;} +########## miRNA ID ################ + $novel++; +########### annotate#################### + do {$aline=;} until($aline=~/flank_first_end/) ; + chomp $aline; + my @flank1=split/\t/,$aline; + do {$aline=;} until($aline=~/flank_second_beg/) ; + chomp $aline; + my @flank2=split/\t/,$aline; +# +########## mature start loop pre #### + do {$aline=;} until($aline=~/mature_beg/) ; + chomp $aline; + my @start=split/\t/,$aline; +# $start[1] -=$flank1[1]; + do {$aline=;} until($aline=~/mature_end/) ; + chomp $aline; + my @end=split/\t/,$aline; +# $end[1] -=$flank1[1]; + do {$aline=;} until($aline=~/mature_seq/) ; + chomp $aline; + my @arr1=split/\t/,$aline; + do {$aline=;} until($aline=~/pre_seq/) ; + chomp $aline; + my @arr2=split/\t/,$aline; + do {$aline=;} until($aline=~/pri_id/) ; + chomp $aline; + my @pri_id=split/\t/,$aline; + do {$aline=;} until($aline=~/pri_seq/) ; + chomp $aline; + my @pri_seq=split/\t/,$aline; + do {$aline=;} until($aline=~/star_beg/) ; + chomp $aline; + my @star_start=split/\t/,$aline; +# $star_start[1] -=$flank1[1]; + do {$aline=;} until($aline=~/star_end/) ; + chomp $aline; + my @star_end=split/\t/,$aline; +# $star_end[1] -=$flank1[1]; + do {$aline=;} until($aline=~/star_seq/) ; + chomp $aline; + my @arr3=split/\t/,$aline; + print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t"; + #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t"; + print FA ">miR-c-$novel\n$arr1[1]\n"; + print PRE ">miR-c-$novel\n$pri_seq[1]\n"; +########## reads count ############# + ; + my @count1;my @count2;my @count3;my @count4; + $aline=; + do { + chomp $aline; + my @reads=split/\t/,$aline; + my @pos=(); + $reads[5]=~/(\d+)\.\.(\d+)/; +# $pos[0] =$1-$flank1[1]; +# $pos[1] =$2-$flank1[1]; + $pos[0]=$1; + $pos[1]=$2; + $reads[0]=~/:([\d|_]+)_x(\d+)$/; + my @ss=split/_/,$1; + for (my $i=0;$i<@ss ;$i++) { + if (!(defined $count3[$i])) { + $count3[$i]=0; + } + if (!(defined $count4[$i])) { + $count4[$i]=0; + } + $count2[$i]+=$ss[$i]; + + } +# $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 ); +# $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 ); +# $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1); +# $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1); + if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 ) + { + for (my $i=0;$i<@ss;$i++) { + $count3[$i]+=$ss[$i]; + } + } + if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){ + for (my $i=0;$i<@ss;$i++) { + $count4[$i]+=$ss[$i]; + } + } + $aline=; + chomp $aline; + } until(length $aline < 1) ; + $"="\t"; + print OUT "@count3\t@count4\t@count2\n"; + $"=" "; +} + +close IN; +close OUT; + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -list -fa -pre -tag +options: +-i input file,predictions file +-list output file miRNA list file +-fa output file ,miRNA sequence fasta file. +-pre output file, miRNA precursor fasta file. +-tag string, sample names# eg: samA,samB,samC +-h help +USAGE +exit(1); +} +