Repository 'mirplant2'
hg clone https://toolshed.g2.bx.psu.edu/repos/big-tiandm/mirplant2

Changeset 13:d1cc2e6ecf90 (2014-07-25)
Previous changeset 12:dc5a29826c7d (2014-07-25) Next changeset 14:554fbaf5f451 (2014-07-25)
Commit message:
Uploaded
added:
miRNA_Express_and_sequence.pl
b
diff -r dc5a29826c7d -r d1cc2e6ecf90 miRNA_Express_and_sequence.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/miRNA_Express_and_sequence.pl Fri Jul 25 05:21:10 2014 -0400
[
@@ -0,0 +1,173 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: Tian Dongmei
+#Email: tiandm@big.ac.cn
+#Date: 2014-6-4
+#Modified:
+#Description: solexa miRNA express and sequence
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","list=s","fa=s","pre=s","tag=s","h");
+if (!(defined $opts{i} and defined $opts{list} and defined $opts{fa} and defined $opts{pre} and defined $opts{tag}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+
+my $filein=$opts{'i'};
+my $fileout=$opts{'list'};
+my $out=$opts{'fa'};
+my $preout=$opts{'pre'};
+
+=cut
+my %hash_pri;
+open PRI,"<$opts{p}";
+while (my $aline=<PRI>) {
+ chomp $aline;
+ if($aline=~/^>(\S+)/){$hash_pri{$1}=$aline;}
+}
+close PRI;
+=cut
+
+open IN,"<$filein"; #input file  
+open OUT,">$fileout"; #output file  
+open FA ,">$out";
+open PRE,">$preout";
+
+print OUT "#ID\tcoordinate\tpos1\tpos2";
+my @marks=split/\,/,$opts{'tag'};
+foreach  (@marks) {
+ print OUT "\t",$_,"_matureExp";
+}
+foreach  (@marks) {
+ print OUT "\t",$_,"_starExp";
+}
+foreach  (@marks) {
+ print OUT "\t",$_,"_totalExp";
+}
+
+print OUT "\n";
+
+my (%uniq_id,$novel);
+while (my $aline=<IN>) {
+ chomp $aline;
+ until ($aline =~ /^score\s+[-\d\.]+/){
+ $aline = <IN>;
+ if (eof) {last;}
+ }
+ if (eof) {last;}
+########## miRNA ID ################
+ $novel++;
+########### annotate####################
+ do {$aline=<IN>;} until($aline=~/flank_first_end/) ;
+ chomp $aline;
+ my @flank1=split/\t/,$aline;
+ do {$aline=<IN>;} until($aline=~/flank_second_beg/) ;
+ chomp $aline;
+ my @flank2=split/\t/,$aline;
+#
+########## mature start loop pre ####
+ do {$aline=<IN>;} until($aline=~/mature_beg/) ;
+ chomp $aline;
+ my @start=split/\t/,$aline;
+# $start[1] -=$flank1[1];
+ do {$aline=<IN>;} until($aline=~/mature_end/) ;
+ chomp $aline;
+ my @end=split/\t/,$aline;
+# $end[1] -=$flank1[1];
+ do {$aline=<IN>;} until($aline=~/mature_seq/) ;
+ chomp $aline;
+ my @arr1=split/\t/,$aline;
+ do {$aline=<IN>;} until($aline=~/pre_seq/) ;
+ chomp $aline;
+ my @arr2=split/\t/,$aline;
+ do {$aline=<IN>;} until($aline=~/pri_id/) ;
+ chomp $aline;
+ my @pri_id=split/\t/,$aline;
+ do {$aline=<IN>;} until($aline=~/pri_seq/) ;
+ chomp $aline;
+ my @pri_seq=split/\t/,$aline;
+ do {$aline=<IN>;} until($aline=~/star_beg/) ;
+ chomp $aline;
+ my @star_start=split/\t/,$aline;
+# $star_start[1] -=$flank1[1];
+ do {$aline=<IN>;} until($aline=~/star_end/) ;
+ chomp $aline;
+ my @star_end=split/\t/,$aline;
+# $star_end[1] -=$flank1[1];
+ do {$aline=<IN>;} until($aline=~/star_seq/) ;
+ chomp $aline;
+ my @arr3=split/\t/,$aline;
+ print OUT "miR-c-$novel\t$pri_id[1]\tmature:$start[1]:$end[1]\tstar:$star_start[1]:$star_end[1]\t";
+ #print OUT "$arr1[1]\t$arr3[1]\t$arr2[1]\t\/\t";
+ print FA ">miR-c-$novel\n$arr1[1]\n";
+ print PRE ">miR-c-$novel\n$pri_seq[1]\n";
+########## reads count #############
+ <IN>;
+ my @count1;my @count2;my @count3;my @count4;
+ $aline=<IN>;
+ do {
+ chomp $aline;
+ my @reads=split/\t/,$aline;
+ my @pos=();
+ $reads[5]=~/(\d+)\.\.(\d+)/;
+# $pos[0] =$1-$flank1[1];
+# $pos[1] =$2-$flank1[1];
+ $pos[0]=$1;
+ $pos[1]=$2;
+ $reads[0]=~/:([\d|_]+)_x(\d+)$/;
+ my @ss=split/_/,$1;
+ for (my $i=0;$i<@ss ;$i++) {
+ if (!(defined $count3[$i])) {
+ $count3[$i]=0;
+ }
+ if (!(defined $count4[$i])) {
+ $count4[$i]=0;
+ }
+ $count2[$i]+=$ss[$i];
+
+ }
+# $count3 +=$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 );
+# $count4 +=$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 );
+# $count1 =$1 if($end[1]-$pos[0]>=10 && $pos[1]-$start[1]>=10 && $count1<$1);
+# $count2 =$1 if($star_end[1]-$pos[0]>=10 && $pos[1]-$star_start[1]>=10 && $count2<$1);
+ if($end[1]-$pos[1]>=-5 && $end[1]-$pos[1]<=5 && $pos[0]-$start[1]>=-3 && $pos[0]-$start[1]<=3 )
+ {
+ for (my $i=0;$i<@ss;$i++) {
+ $count3[$i]+=$ss[$i];
+ }
+ }
+ if($star_end[1]-$pos[1]<=5 && $star_end[1]-$pos[1]>=-5 && $pos[0]-$star_start[1]>=-3 && $pos[0]-$star_start[1]<=3){
+ for (my $i=0;$i<@ss;$i++) {
+ $count4[$i]+=$ss[$i];
+ }
+ } 
+ $aline=<IN>;
+ chomp $aline;
+ } until(length $aline < 1) ;
+ $"="\t";
+ print OUT "@count3\t@count4\t@count2\n";
+ $"=" ";
+}
+
+close IN;
+close OUT;
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -list -fa -pre -tag
+options:
+-i input file,predictions file
+-list output file miRNA list file
+-fa output file ,miRNA sequence fasta file.
+-pre output file, miRNA precursor fasta file.
+-tag string, sample names# eg: samA,samB,samC
+-h help
+USAGE
+exit(1);
+}
+