# HG changeset patch # User big-tiandm # Date 1415239775 18000 # Node ID 9dcffd531c763a90351fba1f8d8e33f3e5a74017 # Parent 2ed1e1728299b837f2aa70d0cef25d60388f1ff1 Uploaded diff -r 2ed1e1728299 -r 9dcffd531c76 Annotate.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Annotate.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,178 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Chentt +#Email: chentt@big.ac.cn +#Date: 2014/4/10 +#Modified: +#Description: cluster annotate by priority +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h"); +if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} +#my $genelistout=$opts{'t'}; +my $dis=defined $opts{'d'}? $opts{'d'}:1000; +my %gene; + +#open OUT,">$genelistout"; #output file +#print OUT "#ID\tchr\tstart\tend\tstrand\ns"; +open IN,"<$opts{g}"; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @tmp=split/\t/,$aline;#ID chr start end strand + #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; + $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]]; +} +#while (my $aline=) { +# chomp $aline; +# next if($aline=~/^\#/); +# my @tmp=split/\t/,$aline; +# my $ID; +# if ($tmp[2] eq "gene") { +# $tmp[0]=~s/Chr\./Chr/; +# $tmp[0]=~s/Chr/chr/; +# my @infor=split/;/,$tmp[8]; +# for (my $i=0;$i<@infor ;$i++) { +# if ($infor[$i]=~/Alias\=(\S+)$/) { +# $ID=$1; +# last; +# } +# } +# $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] +# print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; +# } +#} +close IN; +#close OUT; + + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file +while (my $aline=) { + chomp $aline; + my @tmp=split/\t/,$aline; + if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next} + my @result; + #shift @tmp; + my @id=split/:/,$tmp[0]; + $id[0]=~s/Chr0/Chr/; + my @posi=split/-/,$id[1]; + foreach my $key (keys %{$gene{$id[0]}}) { + if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) { + push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}"; + next; + } + #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) { + if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) { + push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); + push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); + next; + } + #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) { + if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) { + push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); + push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); + next; + } + } + my $result; + if (!(@result)) { + $result="intergenic"; + } + elsif($#result==0){ + $result=$result[0]; + + } + else{ + $result=join "\t",@result; + } +# else{ +# my $te_num=0; +# my @te_overlap; +# my @te_up_down; +# my @non_overlap; +# my @non_up_down; +# for (my $k=0;$k<@result ;$k++) { +# my @rr=split/\;/,$result[$k]; +# if ($rr[3] eq "Y") { +# $te_num++; +# if ($rr[0] eq "overlap") { +# push @te_overlap,$result[$k]; +# } +# else{ +# push @te_up_down,$result[$k]; +# } +# } +# else{ +# if ($rr[0] eq "overlap") { +# push @non_overlap,$result[$k]; +# } +# else{ +# push @non_up_down,$result[$k]; +# } +# } +# } +# if ($te_num==0) {#non TE +# if (!(@te_overlap)) {#down up +# if ($#non_up_down==0) { +# $result=$non_up_down[0]; +# } +# else{#overlap +# my $all_2=join "\t",@non_up_down; +# $result="up&down1-kb\t".$all_2; +# } +# } +# else{ +# $result=join "\t",@non_overlap; +# if ($#non_overlap>=1) { +# print "$aline\t$result\n"; +# } +# } +# } +# else{#TE +# if (!(@te_overlap)) {#down up +# if ($#te_up_down==0) { +# $result=$te_up_down[0]; +# } +# else{#overlap +# my $all_2=join "\t",@te_up_down; +# $result="up&down1-kb\t".$all_2; +# } +# } +# else{ +# $result=join "\t",@te_overlap; +# if ($#te_overlap>=1) { +# print "$aline\t$result\n"; +# } +# } +# } +# } + print OUT "$aline\t$result\n"; +} + +close IN; +close OUT; +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -g -d +options: +-i input file +-g genelist file +-d int the length of the upstream and downstream,default 1000 +-o output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 ClassAnnotate.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ClassAnnotate.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,251 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Chen Tingting +#Email: chentt@big.ac.cn +#Date: 2014/5/13 +#Modified: +#Description: cluster annotate +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","g=s","n=s","r=s","p=s","o=s","t=s","l=s","h"); +if (!(defined $opts{i} and defined $opts{g} and defined $opts{n} and defined $opts{r} and defined $opts{p} and defined $opts{o} and defined $opts{t} and defined $opts{l}) || defined $opts{h}) { #necessary arguments +&usage; +} + +#my %gene; +my %gene1; +open IN,"<$opts{g}"; +open OUT ,">$opts{l}"; +print OUT "#ID\tchr\tstart\tend\tstrand\n"; +my $n=1; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @tmp=split/\t/,$aline; + my $ID; + if ($tmp[2] eq "gene") { + $tmp[0]=~s/Chr\./Chr/; + #$tmp[0]=~s/Chr/chr/; + my @infor=split/;/,$tmp[8]; + for (my $i=0;$i<@infor ;$i++) { + if ($infor[$i]=~/Alias\=(\S+)$/) { + $ID=$1; + last; + } + else { + $ID="unknown$n"; + $n++; + } + } + #$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] + push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]]; + print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; + } +} +#while (my $aline=) { +# chomp $aline; +# next if($aline=~/^\#/); +# my @tmp=split/\t/,$aline;#ID chr start end strand +# push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; +# #$gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[1]]; +#} +close IN; +close OUT; + +my %nat; +open TMP,">$opts{t}"; +print TMP "#NAT_ID\tGene\tStrand\tChr\tGene_start\tGene_end\tAntiGene\tStrand\tChr\tAntiGene_start\tAntiGene_end\tType1\tType2\tNATS1_start\tNATS1_end\tNATS2_start\tNATS2_end\n"; + +open IN,"<$opts{n}"; +my $nat_n=1; +while (my $aline=) { + next if($aline=~/^\#/);#osj LOC_Os05g02659 - LOC_Os01g24200 + trans 1559 1802 660 905 246 100nt - + chomp $aline; + my @arr=split /\t/,$aline; + my ($ns,$ne,$ns2,$ne2)=(0,0,0,0); + if ($arr[11]=~/Nearby/) { + ($ns,$ne)=&nearby($gene1{$arr[1]}[0][0],$gene1{$arr[1]}[0][1],$gene1{$arr[3]}[0][0],$gene1{$arr[3]}[0][1]); + push @{$nat{$gene1{$arr[1]}[0][2]}},[$ns,$ne,$arr[5],$arr[11],"NATs".$nat_n]; + print TMP "NATs$nat_n\t$arr[1]\t$arr[2]\t$gene1{$arr[1]}[0][2]\t$gene1{$arr[1]}[0][0]\t$gene1{$arr[1]}[0][1]\t$arr[3]\t$arr[4]\t$gene1{$arr[3]}[0][2]\t$gene1{$arr[3]}[0][0]\t$gene1{$arr[3]}[0][1]\t$arr[5]\t$arr[11]\t$ns\t$ne\t$ns\t$ne\n"; + $nat_n++; + }else{ + $ns=$gene1{$arr[1]}[0][0]+$arr[6]-1; + $ne=$gene1{$arr[1]}[0][0]+$arr[7]-1; + $ns2=$gene1{$arr[3]}[0][1]-$arr[9]+1; + $ne2=$gene1{$arr[3]}[0][1]-$arr[8]+1; + push @{$nat{$gene1{$arr[1]}[0][2]}},[$ns,$ne,$arr[5],$arr[11],"NATs$nat_n"."_1"];#start,end,class1,class2 + push @{$nat{$gene1{$arr[3]}[0][2]}},[$ns2,$ne2,$arr[5],$arr[11],"NATs$nat_n"."_2"]; + print TMP "NATs$nat_n\t$arr[1]\t$arr[2]\t$gene1{$arr[1]}[0][2]\t$gene1{$arr[1]}[0][0]\t$gene1{$arr[1]}[0][1]\t$arr[3]\t$arr[4]\t$gene1{$arr[3]}[0][2]\t$gene1{$arr[3]}[0][0]\t$gene1{$arr[3]}[0][1]\t$arr[5]\t$arr[11]\t$ns\t$ne\t$ns2\t$ne2\n"; + $nat_n++; + } +} +close IN; +close TMP; + +my %repeat; +open IN,"<$opts{'r'}"; +my $first=; +$first=; +$first=; +while (my $aline=) { + chomp $aline; + $aline=~s/^\s+//; + my @tmp=split/\s+/,$aline; + $tmp[4]=~s/chr0/Chr/; + $tmp[4]=~s/chr/Chr/; + push @{$repeat{$tmp[4]}},[$tmp[5],$tmp[6],$tmp[10]];#start,end,class + #print "$tmp[4]\t$tmp[5]\t$tmp[6]\t$tmp[10]\n"; +} +close IN; + +my %phase; +open IN,"<$opts{'p'}"; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @tmp=split/\t/,$aline; + if ($tmp[5]>=25) { + $phase{$tmp[0]}=$tmp[5]; + } +} +close IN; + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file +while (my $aline=) { + chomp $aline; + if($aline=~/^\#/){ + print OUT "$aline\tPhase\tLong\tRepeatClass\tNatClass1\tNatClass2\tNatID\n"; + next; + } + my @tmp=split/\t/,$aline; + my @inf=split/\:/,$tmp[0]; + my @pos=split/\-/,$inf[1]; + my $chr=$inf[0]; + $chr=~s/Chr0/Chr/; + my $start=$pos[0]; + my $end=$pos[1]; + #=========Repeat============ + my @repeat; + if (defined(@{$repeat{$chr}})) { + my @r_array=sort {$a->[0] <=> $b->[0]} @{$repeat{$chr}}; + for (my $i=0;$i<@r_array ;$i++) { + if ($start<$r_array[$i][0] && $end>$r_array[$i][0]) { + push @repeat,$r_array[$i][2]; + } + elsif($start>$r_array[$i][0] && $start<$r_array[$i][1]){ + push @repeat,$r_array[$i][2]; + + } + elsif($end<$r_array[$i][0]){ + last; + } + else{next;} + } + } + my $repeat; + if (@repeat==0) { + $repeat="\/"; + } + else{ + $repeat=join ";",@repeat; + } + #=========nat=============== + my @nat1;#class 1 + my @nat2;#class 2 + my @nat;#nat ID + #foreach my $chr (keys %nat) { + my @n_array=sort {$a->[0] <=> $b->[0] } @{$nat{$chr}}; + for (my $i=0;$i<@n_array ;$i++) { + if ($start<$n_array[$i][0] && $end>$n_array[$i][0]) { + push @nat1,$n_array[$i][2]; + push @nat2,$n_array[$i][3]; + push @nat,$n_array[$i][4]; + } + elsif($start>$n_array[$i][0] && $start<$n_array[$i][1]){ + push @nat1,$n_array[$i][2]; + push @nat2,$n_array[$i][3]; + push @nat,$n_array[$i][4]; + } + elsif($end<$n_array[$i][0]){ + last; + } + else{next;} + } + #} + + my $nat1; + my $nat2; + my $nat; + if (@nat1==0) { + $nat1="\/"; + } + else{ + $nat1=join ";",@nat1; + } + if (@nat2==0) { + $nat2="\/"; + } + else{ + $nat2=join ";",@nat2; + } + if (@nat==0) { + $nat="\/"; + } + else{ + $nat=join ";",@nat; + } + #========phase============== + my $phase="\/"; + if (defined($phase{$tmp[0]})) { + $phase="phase"; + } + #=========long=============== + my $long="\/"; + if ($tmp[1] eq "\>30nt" and $tmp[2]>=0.5) { + $long="long"; + } + print OUT "$aline\t$phase\t$long\t$repeat\t$nat1\t$nat2\t$nat\n"; +} + +close IN; +close OUT; + +sub nearby{ + my @p=@_; + my ($s,$e)=(0,0); + if ($p[1]<$p[2]) { + $s=$p[1]; + $e=$p[2]; + }else{ + $s=$p[3]; + $e=$p[0]; + } + return ($s,$e); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -g -n -r -p -t -l +options: +-i input file + -g gff file + -n NATs file + -r repeat file + -p phase file +-o output file +-t nat output file +-l genelist output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 DEGseq_2.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DEGseq_2.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,73 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2009-05-06 +#Modified: +#Description: 删除matched reads +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h"); +if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $outputdir=$opts{'outdir'}; +unless ($outputdir=~/\/$/) {$outputdir .="/";} +my $column1=defined $opts{column1} ? $opts{column1} : 3; +my $column2=defined $opts{column2} ? $opts{column2} : 4; +my $mark1=$opts{mark1}; +my $mark2=$opts{mark2}; +my $fileout=$outputdir."degseq.R"; + +open OUT,">$fileout"; #output file +#my ($name,$dir); +#$name=basename($filein); +print OUT "library(DEGseq)\n"; +print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n"; +print OUT "geneExpFile<-file.path(\"$filein\")\n"; +print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n"; +print OUT "outputdir<-file.path(\"$outputdir\")\n"; +print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n"; +print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n"; +if(defined $opts{'depth1'} && defined $opts{'depth2'}){ +print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n"; +} +else{ +print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n"; +} +close OUT; + + +system("R CMD BATCH $fileout"); + +wait; + + + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2 +options: +-i input file +-outdir output file dir +-column1 the first column for DEGseq +-mark1 the name of the column1 +-depth1 depth for the first file,use for normalize +-column2 the second column for DEGseq +-mark2 the name of the column2 +-depth2 depth for the second file,use for normalize + +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 Length_Distibution.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Length_Distibution.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,219 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. input file of Gene region annotation which format like GenePred +# 2. input file of Transcripts region annotation which format like GenePred +# 3. input file of gene snp detail info +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"i=s","o=s",,"h"); +if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) { +&usage; +} +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + #'stroke-width2'=>1 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + 'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family2'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#========================================data============================ +open(IN,"$opt{i}")||die"cannot open the file $opt{i}"; +my @R_length; +my @T_length; +my $R_number=0; +my $T_number=0; +my $R_max=0; +my $T_max=0; + +my $title=; +chomp $title; +my @title=split/\t/,$title; +my @mark=split/\s+/,$title[1]; +my $sample_number=@mark; +while (my $aline=) { + if ($aline=~/^\s/) { + my $T_title=; + chomp $T_title; + while (my $a_aline=) { + chomp $a_aline; + my @temp=split/\t/,$a_aline; + my @number=split/\s+/,$temp[1]; + for (my $i=0;$i<@number ;$i++) { + if ($R_max<$number[$i]) { + $R_max=$number[$i]; + } + } + push @R_length,[$temp[0],@number]; + $R_number++; + } + } + else { + chomp $aline; + my @temp=split/\t/,$aline; + my @number=split/\s+/,$temp[1]; + for (my $i=0;$i<@number ;$i++) { + if ($T_max<$number[$i]) { + $T_max=$number[$i]; + } + } + push @T_length,[$temp[0],@number]; + $T_number++; + } +} +close IN; +print "Tag max: $T_max\nRead max: $R_max\n"; +my $kd_number=5; +##=======================Reads 纵坐标刻度========================== +my $r=1; +my $rr=1; +my $R=$R_max; +while ($R>10) { + $R=$R/10; + $r=$r*10; + $rr++; +} +$R=int($R+0.5); +my $R_xg=$R/$kd_number*$r;#纵坐标一小格大小(一共10格) +my $R_kedu_scale_x=6*$rr;#纵坐标刻度文字 +##=======================Tags 纵坐标刻度========================== +my $t=1; +my $tt=1; +my $T=$T_max; +while ($T>10) { + $T=$T/10; + $t=$t*10; + $tt++; +} +$T=int($T+0.5); +my $T_xg=$T/$kd_number*$t;#纵坐标一小格大小(一共10格) +my $T_kedu_scale_x=6*$tt;#纵坐标刻度文字 + +#############################s#define start coordinate and scale +my $XOFFSET=50; +my $YOFFSET=60; +my $width=800; +my $heigth=800; +my $X_width=600; +#my $height=1600; +#### Starting #### +#新建画布 +my $svg=SVG->new(width=>$width,height=>$heigth); +####坐标轴 +my $axisL=300;#read 纵坐标长度 +my $x_margin = 50; +#=========Reads number setting========================================== +my $Y_R_title=30;#标题的纵向宽度 +my $Y_R_0=$YOFFSET+$axisL+$Y_R_title; +my $X_R_0=$XOFFSET+$x_margin; +my $R_Yscale=$axisL/$R_xg/$kd_number; +my $R_Xscale=$X_width/$R_number/($sample_number+1); +#=====================================Reads Y axis====================== +$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +for (my $i=1;$i<$kd_number ;$i++) { + $svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg); +} +#=====================================Reads X axis====================== +$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + +#print "$R_number\t$sample_number\n"; +for ($i=0;$i<$R_number ;$i++) { + for (my $j=1;$j<$sample_number+1 ;$j++) { + my $red=$j/$sample_number*255; + $svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + } + $svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]); +} +#===Reads number title +$svg->text('x',$XOFFSET+400,'y',$YOFFSET,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution"); +#===Reads +for (my $i=0;$i<$sample_number ;$i++) { + my $red=($i+1)/$sample_number*255; + $svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + $svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); +} +####================================================================================== +#=========================================Tag s +my $Y_T_title=30;#标题的纵向宽度 +my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size +my $X_T_0=$XOFFSET+$x_margin; +my $T_Yscale=$axisL/$T_xg/$kd_number; +my $T_Xscale=$X_width/$T_number/($sample_number+1); +#=====================================Tags Y axis====================== +$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +for (my $i=1;$i<$kd_number ;$i++) { + $svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg); +} +#=====================================Tags X axis====================== +$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + +#print "$R_number\t$sample_number\n"; +for ($i=0;$i<$T_number ;$i++) { + for (my $j=1;$j<$sample_number+1 ;$j++) { + my $red=$j/$sample_number*255; + $svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + } + $svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]); +} +#===Reads number title +$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution"); +#===Reads +for (my $i=0;$i<$sample_number ;$i++) { + my $red=($i+1)/$sample_number*255; + $svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); + $svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); +} + + + + +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-i +-o svg output +-h help +USAGE +exit(1); +} \ No newline at end of file diff -r 2ed1e1728299 -r 9dcffd531c76 SampleDEGseqMerge.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SampleDEGseqMerge.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,94 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: chentt@big.ac.cn +#Date: 2014-05-21 +#Modified: +#Description: merged deg file and total information +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i:s@","mark:s@","f:s","o=s","n=s","h"); +if (!(defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my @filein=@{$opts{'i'}}; +my @mark=@{$opts{'mark'}}; +my $fileout=$opts{'o'}; +my $number=$opts{'n'}; + +my %hash; +open IN,"<$filein[0]"; #input file + +while (my $aline=) { + chomp $aline; + next if($aline=~/^\"/); + my @temp=split/\t/,$aline; + $hash{$temp[0]}=$temp[4]."\t".$temp[6]."\t".$temp[7]."\t".$temp[-1]; +} +close IN; + +for (my $i=1;$i<=$#filein;$i++) { + open IN,"<$filein[$i]"; #input file + + while (my $aline=) { + chomp $aline; + next if($aline=~/^\"/); + my @temp=split/\t/,$aline; + if (!(defined $hash{$temp[0]})) { + print "Not find $temp[0]in sample one!\n"; + next; + } + $hash{$temp[0]} .="\t".$temp[4]."\t".$temp[6]."\t".$temp[7]."\t".$temp[-1]; + } + close IN; +} + +open OUT,">$fileout"; #output file +my $deg_title; +foreach (@mark) { + $deg_title.="log2(Fold_change)\tp_value\tq_value\t".$_."\t"; +} +$deg_title=~s/\s+$//; +my %function; +my $title; +open F,"<$opts{f}"; +while (my $aline=) { + chomp $aline; + if($aline=~/^\#/){ + my $title=$aline; + my @title=split/\t/,$aline; + $title[2+$number].="\t".$deg_title; + $title=join"\t",@title; + print OUT "$title\n"; + next; + } + my @temp=split/\t/,$aline; + $temp[2+$number].="\t".$hash{$temp[0]}; + my $temp=join"\t",@temp; + print OUT "$temp\n"; + +} +close F; +close OUT; + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -mark -f +options: +-i input file # -i output_score.txt -i output_score.txt -i output_score.txt +-mark sample name # -mark sam1_VS_sam2 -mark sam1_VS_sam3 -mark sam2_VS_sam3 +-f cluster file +-n sample number +-o output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 bed2wig.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed2wig.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Chentt +#Email: chentt@big.ac.cn +#Date: 2014/06/25 +#Modified: +#Description: get out larger than cut off sequence +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; +use FileHandle; + +my %opts; +GetOptions(\%opts,"i=s","o=s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $outputdir=$opts{'o'}; +unless ($outputdir=~/\/$/) {$outputdir .="/";} + +##############################get cmap################## +my %cmap; +open IN,"<$opts{i}"; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $cmap{$temp[0]}=$outputdir.$temp[0]; +} +close IN; +###########################split ma file###################### +my %handle; +foreach (keys %cmap) { + my $name=$cmap{$_}.".sam"; + open $handle{$_},">$name"; +} +open IN,"<$opts{i}"; +while (my $aline=) { + next if($aline=~/^\#/); + chomp $aline; + my @temp=split/\t/,$aline; + $handle{$temp[0]}-> print ($aline,"\n"); + +} +close IN; +foreach (keys %handle) {close $_;} + + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file +-o output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 buildInFont.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/buildInFont.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,10599 @@ +#!/usr/bin/perl -w +#Author:Li Shengting +#E-mail:lishengting@genomics.org.cn +#Program Date:2002-12-2015:53 +#Last Update:2006-11-14 0:19 +#Describe:add fonts defs to svg for batik +my $ver=1.00; # +use strict; +#use diagnostics; +#use Getopt::Long; + +###################################################################################################################### +# Usage +###################################################################################################################### +my $usage=<<"USAGE"; +#$ver Usage: buildInFont [font-name] +USAGE +my $argvNumber=3; +die $usage if (@ARGV<$argvNumber); +undef($usage); +undef($argvNumber); +###################################################################################################################### +#my %opts; +#GetOptions(\%opts,"a!","b:s"); +###################################################################################################################### +# Constant +###################################################################################################################### +#use constant PI => 3.1415926535897932384626433832795; +###################################################################################################################### +# Variable +###################################################################################################################### +my ($svgF,$fontF,$outSvg)=@ARGV; +my ($font,$defs); +#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +# Begin +#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +open(F,"$svgF") || die "Can't open $svgF!\n"; +open(O,">$outSvg") || die "Can't write $outSvg!\n"; +if ($fontF eq "x") { + $fontF=$0; +} +$defs=0; +while () { + next if (//); + next if (//); + next if (//); + next if (//); + next if (//); + print O; + if (/) { + if ($font=~//) { + $defs=1; + } + if ($defs) { + print O $font; + } + if ($font=~/<\/defs>/) { + $defs=0; + } + } + } +} +close(F); +#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +# Subprogram +#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +# End +#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +=pod + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +=cut diff -r 2ed1e1728299 -r 9dcffd531c76 chr_plot.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chr_plot.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,607 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. input file of Gene region annotation which format like GenePred +# 2. input file of Transcripts region annotation which format like GenePred +# 3. input file of gene snp detail info +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"g=s","l=s","chro=s","mark=s","span=s","te=s","t=s","cen=s","c=s","o=s","out=s","h"); +if (!(defined $opt{g} and defined $opt{c} and defined $opt{l} and defined $opt{chro} and defined $opt{mark} and defined $opt{o}) || defined $opt{h}) { +&usage; +} +my $span=$opt{span}; +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + csv=>{ + 'stroke'=>"red", + 'stroke-width'=>0.5 + }, + exon=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + intron=>{ + 'stroke'=>"black", + 'stroke-width'=>1.5 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + #'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#############################s#define start coordinate and scale + +my $length=$opt{"l"};#11 +#my @target_e_value=qw(1.78 1.83 1.92 2.00 1.92 2.00 1.93 2.11 2.05 2.03 1.92 1.91 1.54 1.72 1.67); + +my $chr=$opt{"chro"}; +my $start=1; +my $XOFFSET=50; +my $YOFFSET=60; +#my $length=$end-$start+1; +my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 +#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 +#========================================New canvas============================ +#--------------------------------------------------------------- +if (defined($opt{cen})) { + open(CEN,"$opt{cen}")||die"cannot open the file $opt{cen}"; + my %centromere; + while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $centromere{$temp[0]}[0]=$temp[1]; + $centromere{$temp[0]}[1]=$temp[2]; + } + close CEN; +} + +#### Starting #### +#新建画布 +my $svg=SVG->new(); +#画图起始点 +my $canvas_start_x=$XOFFSET; +my $canvas_end_x=$XOFFSET+$length*$Xscale;#按照比例尺 画线 +my $canvas_start_y=$YOFFSET; +my $canvas_end_y=$YOFFSET; +#Draw a straight line between two points start(x1,y1) and end(x2,y2). +#location attribute +$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +$long_scale=int ($length/10);#十等分 大刻度 +#大坐标刻度 +for ($i=0;$i<=10;$i++) { + my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale; + my $long_x_end=$long_x_start; + my $long_y_start=$YOFFSET; + my $long_y_end=$YOFFSET-5; + $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + my $Bscale=$start+$long_scale*$i; + my $cdata=int ($Bscale/1000000); + $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M"); +} + +if (defined($opt{cen})) { + $svg->rect('x',$XOFFSET+$centromere{$chr2}[0]*$Xscale,'y',$YOFFSET-2,'width',($centromere{$chr2}[1]-$centromere{$chr2}[0]+1)*$Xscale,'height',5,'stroke',"black",'stroke-width',"5",'fill',"black"); + + $svg->rect('x',$XOFFSET+$length*$Xscale+15,'y',$YOFFSET+20,'width',10,'height',5,'stroke',"black",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"black"); + $svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$YOFFSET+20+5,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"Centromere"); +} + +$svg->text('x',$XOFFSET+$length*$Xscale*0.42,'y',$YOFFSET-30,'stroke',"black",'stroke-width',1,'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); +#====================================================MAIN PROGRAM======================== + +#===========================================gene list data================================ +#open (TXT,">$opt{out}"); +#open(TE,"$opt{te}")||die"cannot open the file $opt{te}"; +#my %te; +#while (my $aline=) { +# chomp $aline; +# my @temp=split/\t/,$aline; +# if ($temp[2] eq "Y") { +# $te{$temp[0]}=1; +# } +#} +#close TE; +#===================================Exp gene list data ================================= +#open(TARGET,"$opt{t}")||die"cannot open the file $opt{t}"; +#my %target_e; +#my %target_rpkm; +#while (my $aline=) { +# chomp $aline;##Gene 19B1 1PA1 1LC1 29B2 2PA2 2LC2 39B3 3PA3 3LC3 93-4C PA-4C LY-4C 9343 PA43 LY43 19B1_VS_1LC1 19B1_VS_1PA1 1PA1_VS_1LC1 29B2_VS_2LC2 29B2_VS_2PA2 2PA2_VS_2LC2 39B3_VS_3LC3 39B3_VS_3PA3 3PA3_VS_3LC3 934C_LY4C 934C_PA4C PA4C_LY4C 9343_VS_LY43 9343_VS_PA43 PA43_VS_LY43 mpv_1 fold_1 mpv_2 fold_2 mpv_3 fold_3 mpv_4_B fold_4_B mpv_leaf fold_lead +# next if($aline=~/^\#/); +# my @temp=split/\t/,$aline; +# $target_rpkm{$temp[0]}="$temp[7]\t$temp[8]\t$temp[9]"; +# if ($temp[7]>$target_e_value[6]||$temp[8]>$target_e_value[7]||$temp[9]>$target_e_value[8]) { +# $target_e{$temp[0]}="$temp[7]\t$temp[8]\t$temp[9]"; +# } +#} +#close TARGET; +#==================================================================================== +my @genelist; +#my @te_genelist; +my @target_e; +open(GENE,"$opt{g}")||die"cannot open the file $opt{g}"; +while (my $aline=) { + chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 + + my @temp=split/\t/,$aline; + #my $te; + if ($temp[1]=~/^Chr(\d)$/) { + $temp[1]="Chr0$1"; + } + next if($temp[1] ne $chr); + #push @genelist,[$temp[0],$temp[3],$temp[4]]; + push @genelist,[$temp[0],$temp[2],$temp[3]]; +# if ($te{$temp[0]}) { +# push @te_genelist,[$temp[0],$temp[3],$temp[4]]; +# } +# else{ +# push @genelist,[$temp[0],$temp[3],$temp[4]]; +# } +# if ($target_e{$temp[0]}) { +# push @target_e,[$temp[0],$temp[3],$temp[4]]; +# } + +} +close GENE; +my @gene_desity; +#my @region_target_rpkm; +@genelist=sort{$a->[1] <=> $b->[1]}@genelist; +for (my $i=0;$i<@genelist ;$i++) { +# if ($genelist[$i][1]>($num1+1)*$span) { +# $num1++; +# } +# if ($genelist[$i][1]>$num1*$span&&$genelist[$i][1]<=($num1+1)*$span) { +# $gene_desity[$num1]++; +# +# } + my $start=int($genelist[$i][1]/$span); + my $end=int($genelist[$i][2]/$span); + #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]}; + if ($start==$end) { + $gene_desity[$start]++; + #for (my $rs=0;$rs<3 ;$rs++) { + #print TXT "$rs\t$i\t$genelist[$i][0]\t$target_rpkm{$genelist[$i][0]}\n"; + #$region_target_rpkm[$start][$rs]+=$t_rpkm[$rs]; + #} + + #$target_rpkm_desity[$start][0]+=$temp[0]; + #$target_rpkm_desity[$start][1]+=$temp[1]; + #$target_rpkm_desity[$start][2]+=$temp[2]; + } + else{ + for (my $k=$start;$k<=$end ;$k++) { + $gene_desity[$k]++; + #for (my $rs=0;$rs<3 ;$rs++) { + #$region_target_rpkm[$k][$rs]+=$t_rpkm[$rs]; + #} + #$target_rpkm_desity[$k][0]+=$temp[0]; + #$target_rpkm_desity[$k][1]+=$temp[1]; + #$target_rpkm_desity[$k][2]+=$temp[2]; + } + } +} +#------------------------------------------region_gene_number------------------------- +my $max_gene_number=0; +my $total=0; +for (my $i=0;$i<@gene_desity ;$i++) { + if (!(defined($gene_desity[$i]))) { + $gene_desity[$i]=0; + } + if ($gene_desity[$i]>$max_gene_number) { + $max_gene_number=$gene_desity[$i]; + } + #print TXT "$i\t$gene_desity[$i]\n"; + $total+=$gene_desity[$i]; +} +print "Gene max:$max_gene_number\ntotal:$total\n"; +my $abc=@genelist; +#my $cba=@te_genelist; +print "aaaaaa:$abc\n"; +#print "bbbbbb:$cba\n"; +#---------------------------------------------region_gene_rpkm------------------------ +#my $max_region_gene_rpkm=0; +#my @max_region_gene_rpkm=qw(0 0 0); +#my @total_region_gene_rpkm=qw(0 0 0); +#for (my $i=0;$i<@region_target_rpkm ;$i++) { +# for (my $s=0; $s<3;$s++) { +# +# if (!(defined($region_target_rpkm[$i][$s]))) { +# $region_target_rpkm[$i][$s]=0; +# } +# $total_region_gene_rpkm[$s]+=$region_target_rpkm[$i][$s]; +# if ($max_region_gene_rpkm<$region_target_rpkm[$i][$s]) { +# $max_region_gene_rpkm=$region_target_rpkm[$i][$s]; +# } +# if ($max_region_gene_rpkm[$s]<$region_target_rpkm[$i][$s]) { +# $max_region_gene_rpkm[$s]=$region_target_rpkm[$i][$s]; +# } +# } +#} +#my @ave_region_gene_rpkm=qw(0 0 0); +#my $max_ave_rpkm=0; +#for (my $i=0;$i<3;$i++) { +# $ave_region_gene_rpkm[$i]=$total_region_gene_rpkm[$i]/($#region_target_rpkm+1); +# if ($max_ave_rpkm<$ave_region_gene_rpkm[$i]) { +# $max_ave_rpkm=$ave_region_gene_rpkm[$i]; +# } +#} +# +#print "***max region gene rpkm :$max_region_gene_rpkm\n\n"; +#print "@max_region_gene_rpkm\n"; +#if ($max_region_gene_rpkm>10*$max_ave_rpkm) { +# $max_region_gene_rpkm=10*$max_ave_rpkm; +#} +#my @max_region_rpkm; +#---------------------------------------------------------------------------------- +#my @te_gene_desity; +#@te_genelist=sort{$a->[1] <=> $b->[1]}@te_genelist; +##my $num2=0; +#for (my $i=0;$i<@te_genelist ;$i++) { +## if ($te_genelist[$i][1]>($num2+1)*$span) { +## $num2=int($te_genelist[$i][1]/$span); +## } +## if ($te_genelist[$i][1]>$num2*$span&&$te_genelist[$i][1]<=($num2+1)*$span) { +## $te_gene_desity[$num2]++; +## } +# my $start=int($te_genelist[$i][1]/$span); +# my $end=int($te_genelist[$i][2]/$span); +# if ($start==$end) { +# $te_gene_desity[$start]++; +# } +# else{ +# for (my $k=$start;$k<=$end ;$k++) { +# $te_gene_desity[$k]++; +# } +# } +#} +#$max_te_gene_number=0; +#$total=0; +#for (my $i=0;$i<@te_gene_desity ;$i++) { +# if (!(defined($te_gene_desity[$i]))) { +# $te_gene_desity[$i]=0; +# } +# if ($te_gene_desity[$i]>$max_te_gene_number) { +# $max_te_gene_number=$te_gene_desity[$i]; +# } +# print TXT "$i\t$te_gene_desity[$i]\n"; +# $total+=$te_gene_desity[$i]; +#} +# +#print "TE gene max:$max_te_gene_number\ntotal:$total\n"; +#------------------------------------------------------- +#my @target_e_desity; +#@target_e=sort{$a->[1] <=> $b->[1]}@target_e; +#for (my $i=0;$i<@target_e ;$i++) { +# my $start=int($target_e[$i][1]/$span); +# my $end=int($target_e[$i][2]/$span); +# if ($start==$end) { +# $target_e_desity[$start]++; +# } +# else{ +# for (my $k=$start;$k<=$end ;$k++) { +# $target_e_desity[$k]++; +# } +# } +#} +#my $max_target_e_number=0; +#$total=0; +#for (my $i=0;$i<@target_e_desity ;$i++) { +# if (!(defined($target_e_desity[$i]))) { +# $target_e_desity[$i]=0; +# } +# if ($target_e_desity[$i]>$max_target_e_number) { +# $max_target_e_number=$target_e_desity[$i]; +# } +# print TXT "$i\t$target_e_desity[$i]\n"; +# $total+=$target_e_desity[$i]; +#} +# +#print "Target max:$max_target_e_number\ntotal:$total\n"; + +#====================================cluster data======================================= +open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; +my $mark="$opt{mark}"; +my @sample=split/\#/,$mark; +my $sample_num=@sample; +my @cluster; +my @cluster_density; +my @max_cluster_read=(0)x$sample_num; +while (my $aline=) { + next if($aline=~/^\#/); + chomp $aline;##ID chr strand start end 19B1 + #clusterID major_length percent sample1 sample2 sample3 + #Chr01:192429-192452 24nt 1.00 0.00 97222.22 0.00 + my @temp=split/\t/,$aline; + my @id=split/\:/,$temp[0]; + my @posit=split/\-/,$id[1]; + next if($id[0] ne $chr);#Chr.01 chr04 + push @cluster,[$id[0],$posit[0],$posit[1],@temp[3..3+$sample_num+1]]; + #push @cluster,[$temp[0],$temp[3],$temp[4],$temp[11],$temp[12],$temp[13]];#ID start end rpkm(19B1,1PA1,1LC1); + for (my $i=0;$i<@sample ;$i++) { + if($temp[3+$i]>$max_cluster_read[$i]){ + $max_cluster_read[$i]=$temp[3+$i]; + } + } +} +close CLUSTER; +@cluster=sort{$a->[1] <=> $b->[1]}@cluster; +for(my $i=0;$i<$#cluster;$i++) { + my $start=int($cluster[$i][1]/$span); + my $end=int($cluster[$i][2]/$span); + if ($start==$end) { + for (my $j=0;$j<$sample_num ;$j++) { + $cluster_density[$start][$j]+=$cluster[$i][$j+$sample_num]; + } + + } + else{ + for (my $m=$start;$m<=$end ;$m++) { + for (my $j=0;$j<$sample_num ;$j++) { + $cluster_density[$m][$j]+=$cluster[$i][$j+$sample_num]; + } + } + } +} +my @max_cluster_density=(0)x$sample_num; +my @total_cluster_density=(0)x$sample_num; +my $max_cluster_density=0; +for (my $i=0;$i<@sample;$i++) { + for (my $k=0;$k<$#cluster_density ;$k++) { + + if (!(defined($cluster_density[$k][$i]))) { + $cluster_density[$k][$i]=0; + } + $total_cluster_density[$i]+=$cluster_density[$k][$i]; + if ($cluster_density[$k][$i]>$max_cluster_density[$i]) { + $max_cluster_density[$i]=$cluster_density[$k][$i]; + } + if ($cluster_density[$k][$i]>$max_cluster_density) { + $max_cluster_density=$cluster_density[$k][$i]; + + } + } +} +my @ave_cluster_density=(0)x$sample_num; +my $max_ave=0; +for (my $i=0;$i<@sample;$i++) { + $ave_cluster_density[$i]=$total_cluster_density[$i]/($#cluster_density+1); + if ($max_ave<$ave_cluster_density[$i]) { + $max_ave=$ave_cluster_density[$i]; + } +} + +print "max cluster reads:@max_cluster_read\n"; +print "max cluster density:@max_cluster_density\n"; +if ($max_cluster_density>10*$max_ave) { + $max_cluster_density=10*$max_ave; +} +#===================================== plot gene desity ================================= +#===row +my $Y1scale=5; +my $y1_gene_density=$YOFFSET+$max_gene_number*$Y1scale+10; +for (my $i=0;$i<@gene_desity-1 ;$i++) { + my $density_start_x=$XOFFSET+$i*$span*$Xscale; + my $density_start_y=$y1_gene_density-$gene_desity[$i]*$Y1scale; + my $density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $density_end_y=$y1_gene_density-$gene_desity[$i+1]*$Y1scale; + my $heigth=$gene_desity[$i]/$max_gene_number; + $svg->rect('x',$density_start_x,'y',$density_start_y,'width',$span*$Xscale,'height',$y1_gene_density-$density_start_y,'stroke',"read",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); + #$svg->line('x1',$density_start_x,'y1',$density_start_y,'x2',$density_end_x,'y2',$density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'}); + +} +$svg->line('x1',$XOFFSET,'y1',$y1_gene_density,'x2',$XOFFSET+$length*$Xscale,'y2',$y1_gene_density,'stroke',"black",'stroke-width',"black"); + +#====clomun +$svg->line('x1',$XOFFSET,'y1',$y1_gene_density,'x2',$XOFFSET,'y2',$y1_gene_density-$max_gene_number*$Y1scale,'stroke',"black",'stroke-width',"black"); +$svg->text('x',$XOFFSET-15,'y',$y1_gene_density,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',"0");#min gene number +$svg->text('x',$XOFFSET-20,'y',$y1_gene_density-$max_gene_number*$Y1scale,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_gene_number);#max gene number +#========================================================================================= +#=============================plot TE gene desity========================================= +#===row +=cut +my $Y2scale=$Y1scale; +my $y2_te_gene_density=$y1_gene_density; +for (my $i=0;$i<@te_gene_desity-1 ;$i++) { + my $te_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $te_density_start_y=$y2_te_gene_density+$te_gene_desity[$i]*$Y2scale; + my $te_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $te_density_end_y=$y2_te_gene_density+$te_gene_desity[$i+1]*$Y2scale; + #my $te_heigth=$te_gene_desity[$i]/$max_gene_number; + $svg->rect('x',$te_density_start_x,'y',$y2_te_gene_density,'width',$span*$Xscale,'height',$te_density_start_y-$y2_te_gene_density,'stroke',"read",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); + #$svg->line('x1',$density_start_x,'y1',$density_start_y,'x2',$density_end_x,'y2',$density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'}); + +} +#column +$svg->line('x1',$XOFFSET,'y1',$y2_te_gene_density,'x2',$XOFFSET,'y2',$y2_te_gene_density+$max_te_gene_number*$Y2scale,'stroke',"black",'stroke-width',"black"); +$svg->text('x',$XOFFSET-20,'y',$y2_te_gene_density+$max_te_gene_number*$Y2scale,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_te_gene_number);#min gene number +=cut +#=======================gene density txt================================================== +my $md=$span/1000; +#$svg->text('x',$XOFFSET-30,'y',$YOFFSET+10,'width',"698",'height',"298",'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',"Number per \\30 kb","rotate","90",'writing-mode',"tb-rl"); + +$svg->rect('x',$XOFFSET+$length*$Xscale+15,'y',$y1_gene_density-$max_gene_number*$Y1scale*0.7,'width',10,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); +$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y1_gene_density-$max_gene_number*$Y1scale*0.7+5,'stroke',"green",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"Genes"); + +#$svg->rect('x',$XOFFSET+$length*$Xscale+15,'y',$y1_gene_density-$max_gene_number*$Y1scale*0.7+20,'width',10,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); +#$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y1_gene_density-$max_gene_number*$Y1scale*0.7+20+5,'stroke',"green",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"TE Genes"); + +$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y1_gene_density-$max_gene_number*$Y1scale*0.7+20,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"genes number \/ $md kb"); +#========================================================================================= +#=============================plot exp gene desity========================================= +=cut +my $y3_target_e_density=$y2_te_gene_density+$max_te_gene_number*$Y2scale+$max_target_e_number*$Y2scale+20; +#my $Y3scale=$Y1scale; +for (my $i=0;$i<@target_e_desity-1 ;$i++) { + my $target_e_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $target_e_density_start_y=$y3_target_e_density-$target_e_desity[$i]*$Y2scale; + my $target_e_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $target_e_density_end_y=$y3_target_e_density-$target_e_desity[$i+1]*$Y2scale; + $svg->rect('x',$target_e_density_start_x,'y',$target_e_density_start_y,'width',$span*$Xscale,'height',$y3_target_e_density-$target_e_density_start_y,'stroke',"read",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); + +} +$svg->line('x1',$XOFFSET,'y1',$y3_target_e_density,'x2',$XOFFSET+$length*$Xscale,'y2',$y3_target_e_density,'stroke',"black",'stroke-width',"black"); +#column +$svg->line('x1',$XOFFSET,'y1',$y3_target_e_density,'x2',$XOFFSET,'y2',$y3_target_e_density-$max_te_gene_number*$Y2scale,'stroke',"black",'stroke-width',"black"); +$svg->text('x',$XOFFSET-15,'y',$y3_target_e_density,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',0);#max gene number +$svg->text('x',$XOFFSET-20,'y',$y3_target_e_density-$max_te_gene_number*$Y2scale+10,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_target_e_number);#max gene number +#=========================================exp gene indensity txt========================== +$svg->rect('x',$XOFFSET+$length*$Xscale+15,'y',$y3_target_e_density-$max_target_e_number*$Y2scale*0.7,'width',10,'height',5,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); +$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y3_target_e_density-$max_target_e_number*$Y2scale*0.7+5,'stroke',"red",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"Exp Genes"); +$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y3_target_e_density-$max_target_e_number*$Y2scale*0.7+20,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"genes number \/ 50kb"); +#calculate the different +=cut +#======================================================================================== +my $Y3scale=0.04/$max_cluster_density*2500; +#my $y3_cluster_density=$y2_te_gene_density+$max_te_gene_number*5+$max_cluster_density*$Y3scale+10; +my $y3_cluster_density=$y1_gene_density+20; +my $y3_total=$y1_gene_density+10; +my @cluster_color=qw(fuchsia violet tomato); +for (my $s=0;$s<3 ;$s++) { + $y3_total=$y3_total+$max_cluster_density*$Y3scale+5; + $svg->line('x1',$XOFFSET,'y1',$y3_total,'x2',$XOFFSET+$length*$Xscale,'y2',$y3_total,'stroke',"black",'stroke-width',$attribute{csv}{'stroke-width'}); + $svg->line('x1',$XOFFSET,'y1',$y3_total,'x2',$XOFFSET,'y2',$y3_total-$max_cluster_density*$Y3scale,'stroke',"black",'stroke-width',$attribute{csv}{'stroke-width'}); + + $svg->text('x',$XOFFSET-15,'y',$y3_total,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',"0");#min gene number + + if ($max_cluster_density[$s]>$max_cluster_density) { + $svg->text('x',$XOFFSET-40,'y',$y3_total-$max_cluster_density*$Y3scale+10,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_cluster_density[$s]);#max gene number + } + else{ + $svg->text('x',$XOFFSET-40,'y',$y3_total-$max_cluster_density[$s]*$Y3scale+10,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_cluster_density[$s]);#max gene number + } + + for (my $i=0;$i<$#cluster_density ;$i++) { + my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + if ($cluster_density[$i][$s]>$max_cluster_density) { + $cluster_density[$i][$s]=$max_cluster_density; + } + if ($cluster_density[$i+1][$s]>$max_cluster_density) { + $cluster_density[$i+1][$s]=$max_cluster_density; + } + my $cluster_density_start_y=$y3_total-$cluster_density[$i][$s]*$Y3scale; + my $cluster_density_end_y=$y3_total-$cluster_density[$i+1][$s]*$Y3scale; + $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',$cluster_color[$s],'stroke-width',$attribute{csv}{'stroke-width'}); + } +} +for (my $s=0;$s<@sample ;$s++) { + $svg->line('x1',$XOFFSET+$length*$Xscale+10,'y1',$y3_cluster_density+($y3_total-$y3_cluster_density)*0.3+30*$s,'x2',$XOFFSET+$length*$Xscale+30,'y2',$y3_cluster_density+($y3_total-$y3_cluster_density)*0.3+30*$s,'stroke',$cluster_color[$s],'stroke-width',$attribute{csv}{'stroke-width'}); + $svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y3_cluster_density+($y3_total-$y3_cluster_density)*0.3+30*$s+5,'stroke',$cluster_color[$s],'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"Small RNAs ".$sample[$s]); +} + +$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y3_cluster_density+($y3_total-$y3_cluster_density)*0.3+30*3-5,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"indensity of sRNA \/ $md kb"); +#===================================plot region target gene rpkm================ +=cut +my $y4_region_gene_rpkm_1=$y3_total+10; +my $y4_region_gene_rpkm=$y4_region_gene_rpkm_1; +my $Y4scale=0.1/$max_region_gene_rpkm*1000; +my @cluster_color_t=qw(blue slateblue steelblue); +for (my $s=0;$s<3 ;$s++) { + $y4_region_gene_rpkm+=$max_region_gene_rpkm*$Y4scale+5; + + $svg->line('x1',$XOFFSET,'y1',$y4_region_gene_rpkm,'x2',$XOFFSET+$length*$Xscale,'y2',$y4_region_gene_rpkm,'stroke',"black",'stroke-width',$attribute{csv}{'stroke-width'}); + + $svg->line('x1',$XOFFSET,'y1',$y4_region_gene_rpkm,'x2',$XOFFSET,'y2',$y4_region_gene_rpkm-$max_region_gene_rpkm*$Y4scale,'stroke',"black",'stroke-width',$attribute{csv}{'stroke-width'}); + + $svg->text('x',$XOFFSET-15,'y',$y4_region_gene_rpkm,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',"0");#min gene number + + if ($max_region_gene_rpkm[$s]>$max_region_gene_rpkm) { + $svg->text('x',$XOFFSET-40,'y',$y4_region_gene_rpkm-$max_region_gene_rpkm*$Y4scale+10,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_region_gene_rpkm[$s]);#max gene number + } + else{ + $svg->text('x',$XOFFSET-40,'y',$y4_region_gene_rpkm-$max_region_gene_rpkm[$s]*$Y4scale+10,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size2'},'font-family',$attribute{font}{'font-family'},'-cdata',$max_region_gene_rpkm[$s]);#max gene number + } + for (my $i=0;$i<$#region_target_rpkm ;$i++) { + my $region_target_rpkm_start_x=$XOFFSET+$i*$span*$Xscale; + my $region_target_rpkm_end_x=$XOFFSET+($i+1)*$span*$Xscale; + if ($region_target_rpkm[$i][$s]>$max_region_gene_rpkm) { + $region_target_rpkm[$i][$s]=$max_region_gene_rpkm; + } + if ($region_target_rpkm[$i+1][$s]>$max_region_gene_rpkm) { + $region_target_rpkm[$i+1][$s]=$max_region_gene_rpkm; + } + my $region_target_rpkm_start_y=$y4_region_gene_rpkm-$region_target_rpkm[$i][$s]*$Y4scale; + my $region_target_rpkm_end_y=$y4_region_gene_rpkm-$region_target_rpkm[$i+1][$s]*$Y4scale; + $svg->line('x1',$region_target_rpkm_start_x,'y1',$region_target_rpkm_start_y,'x2',$region_target_rpkm_end_x,'y2',$region_target_rpkm_end_y,'stroke',$cluster_color_t[$s],'stroke-width',$attribute{csv}{'stroke-width'}); + } + +} +for (my $s=0;$s<3 ;$s++) { + $svg->line('x1',$XOFFSET+$length*$Xscale+10,'y1',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y4_region_gene_rpkm_1)*0.3+30*$s,'x2',$XOFFSET+$length*$Xscale+30,'y2',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y4_region_gene_rpkm_1)*0.3+30*$s,'stroke',$cluster_color_t[$s],'stroke-width',$attribute{csv}{'stroke-width'}); + $svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y4_region_gene_rpkm_1)*0.3+30*$s+5,'stroke',$cluster_color_t[$s],'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"Target Genes ".$sample[$s]); +} +$svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y4_region_gene_rpkm_1)*0.3+30*3-5,'stroke',"black",'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',"indensity of genes \/ 50kb"); +=cut +#for (my $s=0;$s<3 ;$s++) { +# $svg->line('x1',$XOFFSET+$length*$Xscale+10,'y1',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y3_cluster_density)*0.3+30*$s,'x2',$XOFFSET+$length*$Xscale+30,'y2',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y3_cluster_density)*0.3+30*$s,'stroke',$cluster_color[$s],'stroke-width',$attribute{csv}{'stroke-width'}); +# $svg->text('x',$XOFFSET+$length*$Xscale+35,'y',$y4_region_gene_rpkm_1+($y4_region_gene_rpkm-$y3_cluster_density)*0.3+30*$s+5,'stroke',$cluster_color[$s],'stroke-width',$attribute{text}{'stroke-width'},'font-size',$attribute{font}{'font-size'},'font-family',$attribute{font}{'font-family'},'-cdata',$sample[$s]); +#} +#========================================================================================= + +#sub ExpG_number { +# +#} +#sub ExpCluster_number{ +# +#} + + +#======================================================================================== +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub log2 { + my $n = shift; + return log($n)/log(2); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-l centromere length +-chro +-mark \# +-g input file of Gene region annotation which format like GenePred +-span +-c cluster file input +-o svg output +-h help +USAGE +exit(1); +} \ No newline at end of file diff -r 2ed1e1728299 -r 9dcffd531c76 collapseReads2Tags.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/collapseReads2Tags.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,170 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2014-3-20 +#Modified: +#Description: fastq file form reads cluster(the same sequence in the same cluster) +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i:s@","format=s","mark:s","qual:s","qv:i","o=s","h"); +if (!(defined $opts{o} and defined $opts{'format'}) || defined $opts{h}) { #necessary arguments +&usage; +} +my @filein=@{$opts{i}} if(defined $opts{i}); +my $name=defined $opts{'mark'} ? $opts{'mark'} : "seq"; +my $fileout=$opts{'o'}; +my $pq=defined $opts{'qv'} ? $opts{'qv'} : 33; +my %hash;##分块存放原始序列 + +my $format=$opts{'format'}; +if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { + die "Parameter -format is error!\n"; +} + +my ($qualT,$qualV); +if (defined $opts{'qual'} && ($format eq "fastq" || $format eq "fq")) { #quality filter + my @temp=split /:/,$opts{'qual'}; + $qualT=$temp[0]; + $qualV=$temp[1]; + + for (my $i=0;$i<@filein;$i++) { + open IN,"<$filein[$i]"; + while (my $aline=) { + my $seq=; + my $n=; + my $qv=; + my $tag=&qvcheck($qv,$qualT,$qualV); + next if(!$tag); + my $str=substr($seq,0,6); + $hash{$str}[$i].=$seq; + } + close IN; + } +} +elsif($format eq "fastq" || $format eq "fq"){ ### do not filter low quality reads + for (my $i=0;$i<@filein;$i++) { + open IN,"<$filein[$i]"; + while (my $aline=) { + my $seq=; + my $n=; + my $qv=; + my $str=substr($seq,0,6); + $hash{$str}[$i].=$seq; + } + close IN; + } + +} +elsif($format eq "fasta" || $format eq "fa"){ + for (my $i=0;$i<@filein;$i++) { + open IN,"<$filein[$i]"; + while (my $aline=) { + my $seq=; + my $str=substr($seq,0,6); + $hash{$str}[$i].=$seq; + } + close IN; + } +} + +open OUT,">$fileout"; #output file +my $count=0; +foreach my $key (keys %hash) { + my %cluster; + for (my $i=0;$i<@filein;$i++) { + next if(!(defined $hash{$key}[$i])); + my @tmp=split/\n/,$hash{$key}[$i]; + foreach (@tmp) { + $cluster{$_}[$i]++; + } + } + + foreach my $seq (keys %cluster) { + my $exp=""; my $ee=0; + for (my $i=0;$i<@filein;$i++) { + if (defined $cluster{$seq}[$i]) { + $exp.="_$cluster{$seq}[$i]"; + $ee+=$cluster{$seq}[$i]; + }else{ + $exp.="_0"; + } + } + $count+=$ee; + $exp=~s/^_//; + print OUT ">$name","_$count:$exp","_x$ee\n$seq\n"; + } +} +close OUT; + + +sub qvcheck{ + my ($str,$t,$v)=@_; + my $qv=0; + if($t eq "mean"){ + $qv=&getMeanQuality($str); + } + elsif($t eq "min"){ + $qv=&getMinQuality($str); + } + if ($qv<$v) { + return 0; + } + return 1; +} + +sub getMeanQuality(){ + chomp $_[0]; + my @bases = split(//,$_[0]); + my $sum = 0; + for(my $i = 0; $i <= $#bases; $i++){ + my $num = ord($bases[$i]) - $pq; + $sum += $num; + } + + return $sum/($#bases+1); + +} + +### +### This function gives back the Q-value of the worst base +sub getMinQuality(){ + chomp $_[0]; + my @bases = split(//,$_[0]); + my $worst = 1000; + for(my $i = 0; $i <= $#bases; $i++){ +# printf ("base: $bases[$i] --> %d\n",ord($bases[$i])); + my $num = ord($bases[$i]) - $pq; + if($num < $worst){ + $worst = $num; + } + } + return $worst; +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -format -mark -qual -qv -o +options: +-i input file#fastq file ##can be multiple -i file1 -i file2 ... +-mark string#quary name,default is "seq" +-o output file +-format string # fastq|fasta|fq|fa + +-qual #reads filter + eg:(min:value/mean:value) + This parameter just for solexa reads. + If the input files are solid and needs filter,please do filter first . + +-qv integer #Phred quality64/33,default 33 +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 conventional.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conventional.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,156 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Chentt +#Email: chentt@big.ac.cn +#Date: 2014/04/09 +#Modified: +#Description: islands merged of merged samples +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h"); +if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +my $distance=$opts{'d'}; +my $tempout=$opts{'t'}; +my $mark=$opts{'mark'}; +my @sample=split/\#/,$mark; +$mark=join"\"\t\"",@sample; + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file +print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n"; +open TMP,">$tempout"; +print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n"; +my %hash; +while (my $aline=) { + chomp $aline; + if($aline=~/^\#/){ + #print OUT "$aline\n"; + next; + } + my @tmp=split/\t/,$aline; + my $chr=shift @tmp; + #shift @tmp; + push @{$hash{$chr}},[@tmp]; +} + +close IN; + +foreach my $key (keys %hash) { + my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}}; + my @sample; + my $start=$tag[0][1]; + my $end=$tag[0][2]; + push @sample,[@{$tag[0]}]; + for (my $i=1;$i<@tag-1;$i++) { + if ($tag[$i][1]-$end<=$distance) { + if ($tag[$i][2]>$end) { + $end=$tag[$i][2]; + } + push @sample,[@{$tag[$i]}]; + } + else{ + my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); + my $cluster_exp=join"\t",@cluster_exp; + if ($max_length>30) { + print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + $max_length="\>30"; + } + else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} + print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; + $start=$tag[$i][1]; + $end=$tag[$i][2]; + + @sample=(); + push @sample,[@{$tag[$i]}]; + } + } + if ($tag[$#tag][1]-$end<=$distance) { + if ($tag[$#tag][2]>$end) { + $end=$tag[$#tag][2]; + } + push @sample,[@{$tag[$#tag]}]; + my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); + my $cluster_exp=join"\t",@cluster_exp; + if ($max_length>30) { + $max_length="\>30"; + print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + } + else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} + print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; + } + else{ + my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); + my $cluster_exp=join"\t",@cluster_exp; + if ($max_length>30) { + $max_length="\>30"; + print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + } + else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} + print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; + + } +} +close OUT; +close TMP; +sub Max_length{ + my @exp=@{$_[0]}; + my %sample_length; + my $total_exp; + my @each; + my @tag; + for (my $i=0;$i<=$#exp ;$i++) { + my $length=$exp[$i][2]-$exp[$i][1]+1; + #if ($length>30) { + # $length=40; + #} + my $exp=0; + foreach (1..$opts{'N'}) { + $exp+=$exp[$i][$_+2]; + $each[$_-1]+=$exp[$i][$_+2]; + } + $sample_length{$length}+=$exp; + $total_exp+=$exp; + push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp); + } + my $max=0; + my $max_key; + foreach my $key (sort keys %sample_length) { + my $p=$sample_length{$key}/$total_exp; + if ($p>$max) { + $max=$p; + $max_key=$key; + } + $sample_length{$key}=sprintf("%.2f",$p); + } + my $tag_n=@tag; + my $tag=join";",@tag; + $tag=$tag_n."\t".$tag; + return($max_key,$sample_length{$max_key},$tag,@each); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -d -N -t -mark +options: +-i input file +-d distance of two islands +-mark sample name; +-o output file +-N sample number +-t temp output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 convert_bowtie_to_blast.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_bowtie_to_blast.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,126 @@ +#!/usr/bin/perl + + +use warnings; +use strict; +use Getopt::Std; + +######################################### USAGE ################################ + +my $usage= +"$0 file_bowtie_result file_solexa_seq file_chromosome + +This is a converter which changes Bowtie output into Blast format. +The input includes three files: a Bowtie result file (default Bowtie +output file), a fasta file consisting of small Reads and a chromosome +fasta file. It outputs the alignments in blast_parsed format. + +file_bowtie_result likes: + +AtFlower100010_x2 + MIR319c 508 AAGGAGATTCTTTCAGTCCAG IIIIIIIIIIIIIIIIIIIII 0 +AtFlower1000188_x1 + MIR2933a 421 TCGGAGAGGAAATTCGTCGGCG IIIIIIIIIIIIIIIIIIIIII 0 + +file_solexa_seq likes: + +>AtFlower100010_x2 +AAGGAGATTCTTTCAGTCCAG + +file_chromosome contains chromosome seq in fasta format + +"; + + +####################################### INPUT FILES ############################ + +my $file_bowtie_result=shift or die $usage; +my $file_short_seq=shift or die $usage; +my $file_chromosome_seq=shift or die $usage; + + +##################################### GLOBAL VARIBALES ######################### + +my %short_seq_length=(); +my %chromosome_length=(); + + +######################################### MAIN ################################# + +#get the short sequence id and its length +sequence_length($file_short_seq,\%short_seq_length); + +#get the chromosome sequence id and its length +sequence_length($file_chromosome_seq,\%chromosome_length); + +#convert bowtie result format to blast format; +change_format($file_bowtie_result); + +exit; + + +##################################### SUBROUTINES ############################## + +sub sequence_length{ + my ($file,$hash) = @_; + my ($id, $desc, $sequence, $seq_length) = (); + + open (FASTA, "<$file") or die "can not open $$file\n"; + while () + { + chomp; + if (/^>(\S+)(.*)/) + { + $id = $1; + $desc = $2; + $sequence = ""; + while (){ + chomp; + if (/^>(\S+)(.*)/){ + $$hash{$id} = length $sequence; + $id = $1; + $desc = $2; + $sequence = ""; + next; + } + $sequence .= $_; + } + } + } + $seq_length=length($sequence); + $$hash{$id} = $seq_length; + close FASTA; +} + + + + + +sub change_format{ + #Change Bowtie format into blast format + my $file=shift @_; + open(FILE,"<$file")||die"can not open the bowtie result file:$!\n"; + #open(BLASTOUT,">blastout")||die"can not create the blastout file:$!\n"; + + while(){ + chomp; + my @tmp=split("\t",$_); + #Clean the reads ID + my @tmp1=split(" ",$tmp[0]); + print "$tmp1[0]"."\t"."$short_seq_length{$tmp1[0]}"."\t"."1".'..'."$short_seq_length{$tmp1[0]}"."\t"."$tmp[2]"."\t"."$chromosome_length{$tmp[2]}"."\t"; + if($tmp[1] eq "+"){ + my $seq_end=$tmp[3] + $short_seq_length{$tmp1[0]}; + my $seq_bg=$tmp[3] + 1; + print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Plus"."\n"; + } + if($tmp[1] eq "-"){ + my $seq_end=$chromosome_length{$tmp[2]} - $tmp[3]; + my $seq_bg=$seq_end - $short_seq_length{$tmp1[0]} + 1; + print "$seq_bg".'..'."$seq_end"."\t"."1e-04"."\t"."1.00"."\t"."42.1"."\t"."Plus / Minus"."\n"; + } + } + +# close BLASTOUT; + +} + + + diff -r 2ed1e1728299 -r 9dcffd531c76 count_ref_length.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count_ref_length.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,58 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2009-05-06 +#Modified: +#Description: 删除matched reads +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","o=s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file + +my ($name,$seq); +while (my $aline=) { + chomp $aline; + if ($aline=~/^>(\S+)/) { + $name=$1; + while (my $new=) { + chomp $new; + if ($new=~/^>(\S+)/) { + print OUT $name,"\t",length($seq),"\n"; + $seq =""; + $name=$1; + next; + } + else{$seq .=$new;} + } + } + print OUT $name,"\t",length($seq),"\n"; +} + +close IN; +close OUT; +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file +-o output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 count_rfam_express.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count_rfam_express.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,1800 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2013/7/19 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","o=s","tag:s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; + +my $marks=defined $opts{'tag'} ? $opts{'tag'} : ""; + +if(!(defined $opts{'tag'})){ + my $line=`head -1 $filein`; + my @tmp=split/\t/,$line; + $tmp[0]=~/:([\d|_]+)_x(\d+)$/; + my @ss=split/_/,$1; + for (my $i=1;$i<=@ss;$i++) { + $marks .="Smp$i;"; + } +} + +my @marks=split/\;/,$marks; + +my %rfam_key; +while(){ + chomp; + if(/^(\S+)\s+(\S+)$/){ + $rfam_key{$1}=$2; + } +} + + +my %reads; +my %tags; +open IN,"<$filein"; +while (my $aline=) { + chomp $aline; + my @tmp=split/\t/,$aline; + $tmp[0]=~/:([\d|_]+)_x(\d+)$/; + + my @exp=split/_/,$1; + my @tag=split/\;/,$tmp[2]; + + if (defined $rfam_key{$tag[0]}) { + for (my $i=0;$i<@exp;$i++) { + $reads{$rfam_key{$tag[0]}}[$i]+=$exp[$i]; + $tags{$rfam_key{$tag[0]}}[$i]++ if($exp[$i]!=0); + } + }else{ + for (my $i=0;$i<@exp;$i++) { + $reads{other}[$i]+=$exp[$i]; + $tags{other}[$i]++ if($exp[$i]!=0); + } + } + +} +close IN; + +$"="\t"; ##### @array print in \t +open OUT,">$fileout"; +print OUT "####################################\n# small RNA expressed reads number #\n####################################\n"; +print OUT "#RNAname\t@marks\n"; +foreach my $key (keys %reads) { + print OUT $key; + for (my $i=0;$i<@{$reads{$key}} ;$i++) { + print OUT "\t",$reads{$key}[$i]; + } + print OUT "\n"; +} + +print OUT "\n\n####################################\n# small RNA expressed tags number #\n####################################\n"; +print OUT "#RNAname\t@marks\n"; + +foreach my $key (keys %tags) { + print OUT $key; + for (my $i=0;$i<@{$reads{$key}} ;$i++) { + if(defined $tags{$key}[$i]){print OUT "\t",$tags{$key}[$i];} + else{print OUT "\t0";} + } + print OUT "\n"; +} + +close OUT; +$"=" "; ##### @array print in \t + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -tag -o +options: +-i input file# rfam bowtie bwt. format mapping result +-tag [string] sample marks# eg. sampleA;sampleB;sampleC +-o output file + +-h help +USAGE +exit(1); +} + +__DATA__ +RF00635 lncRNA +RF01868 lncRNA +RF01869 lncRNA +RF01870 lncRNA +RF01871 lncRNA +RF01872 lncRNA +RF01873 lncRNA +RF01874 lncRNA +RF01875 lncRNA +RF01876 lncRNA +RF01877 lncRNA +RF01878 lncRNA +RF01879 lncRNA +RF01880 lncRNA +RF01881 lncRNA +RF01882 lncRNA +RF01883 lncRNA +RF01884 lncRNA +RF01885 lncRNA +RF01886 lncRNA +RF01887 lncRNA +RF01888 lncRNA +RF01889 lncRNA +RF01890 lncRNA +RF01891 lncRNA +RF01892 lncRNA +RF01893 lncRNA +RF01894 lncRNA +RF01904 lncRNA +RF01905 lncRNA +RF01906 lncRNA +RF01907 lncRNA +RF01908 lncRNA +RF01909 lncRNA +RF01928 lncRNA +RF01929 lncRNA +RF01930 lncRNA +RF01931 lncRNA +RF01932 lncRNA +RF01933 lncRNA +RF01934 lncRNA +RF01935 lncRNA +RF01946 lncRNA +RF01947 lncRNA +RF01948 lncRNA +RF01950 lncRNA +RF01951 lncRNA +RF01952 lncRNA +RF01953 lncRNA +RF01954 lncRNA +RF01955 lncRNA +RF01956 lncRNA +RF01957 lncRNA +RF01958 lncRNA +RF01961 lncRNA +RF01962 lncRNA +RF01963 lncRNA +RF01964 lncRNA +RF01965 lncRNA +RF01966 lncRNA +RF01967 lncRNA +RF01968 lncRNA +RF01969 lncRNA +RF01970 lncRNA +RF01971 lncRNA +RF01972 lncRNA +RF01973 lncRNA +RF01974 lncRNA +RF01975 lncRNA +RF01976 lncRNA +RF01977 lncRNA +RF01978 lncRNA +RF01979 lncRNA +RF01980 lncRNA +RF01981 lncRNA +RF01983 lncRNA +RF01984 lncRNA +RF01985 lncRNA +RF01986 lncRNA +RF01987 lncRNA +RF01992 lncRNA +RF02038 lncRNA +RF02039 lncRNA +RF02040 lncRNA +RF02041 lncRNA +RF02042 lncRNA +RF02043 lncRNA +RF02044 lncRNA +RF02045 lncRNA +RF02046 lncRNA +RF02047 lncRNA +RF02085 lncRNA +RF02086 lncRNA +RF02087 lncRNA +RF02089 lncRNA +RF02090 lncRNA +RF02091 lncRNA +RF02098 lncRNA +RF02101 lncRNA +RF02102 lncRNA +RF02103 lncRNA +RF02104 lncRNA +RF02105 lncRNA +RF02106 lncRNA +RF02107 lncRNA +RF02108 lncRNA +RF02109 lncRNA +RF02110 lncRNA +RF02112 lncRNA +RF02113 lncRNA +RF02114 lncRNA +RF02115 lncRNA +RF02116 lncRNA +RF02117 lncRNA +RF02118 lncRNA +RF02119 lncRNA +RF02120 lncRNA +RF02121 lncRNA +RF02122 lncRNA +RF02123 lncRNA +RF02124 lncRNA +RF02125 lncRNA +RF02126 lncRNA +RF02127 lncRNA +RF02128 lncRNA +RF02129 lncRNA +RF02130 lncRNA +RF02131 lncRNA +RF02132 lncRNA +RF02133 lncRNA +RF02134 lncRNA +RF02135 lncRNA +RF02136 lncRNA +RF02137 lncRNA +RF02138 lncRNA +RF02139 lncRNA +RF02140 lncRNA +RF02141 lncRNA +RF02142 lncRNA +RF02143 lncRNA +RF02145 lncRNA +RF02146 lncRNA +RF02147 lncRNA +RF02148 lncRNA +RF02149 lncRNA +RF02150 lncRNA +RF02152 lncRNA +RF02153 lncRNA +RF02154 lncRNA +RF02155 lncRNA +RF02156 lncRNA +RF02157 lncRNA +RF02158 lncRNA +RF02159 lncRNA +RF02160 lncRNA +RF02161 lncRNA +RF02164 lncRNA +RF02165 lncRNA +RF02166 lncRNA +RF02167 lncRNA +RF02168 lncRNA +RF02169 lncRNA +RF02170 lncRNA +RF02171 lncRNA +RF02172 lncRNA +RF02173 lncRNA +RF02174 lncRNA +RF02175 lncRNA +RF02176 lncRNA +RF02177 lncRNA +RF02178 lncRNA +RF02179 lncRNA +RF02180 lncRNA +RF02181 lncRNA +RF02182 lncRNA +RF02183 lncRNA +RF02184 lncRNA +RF02185 lncRNA +RF02186 lncRNA +RF02187 lncRNA +RF02188 lncRNA +RF02189 lncRNA +RF02190 lncRNA +RF02191 lncRNA +RF02192 lncRNA +RF02193 lncRNA +RF02195 lncRNA +RF02196 lncRNA +RF02197 lncRNA +RF02198 lncRNA +RF02199 lncRNA +RF02200 lncRNA +RF02201 lncRNA +RF02202 lncRNA +RF02203 lncRNA +RF02204 lncRNA +RF02205 lncRNA +RF02206 lncRNA +RF02207 lncRNA +RF02208 lncRNA +RF02209 lncRNA +RF02210 lncRNA +RF02211 lncRNA +RF02212 lncRNA +RF02213 lncRNA +RF02215 lncRNA +RF02216 lncRNA +RF02217 lncRNA +RF02218 lncRNA +RF02219 lncRNA +RF02220 lncRNA +RF02246 lncRNA +RF02247 lncRNA +RF02248 lncRNA +RF02249 lncRNA +RF02250 lncRNA +RF02251 lncRNA +RF02252 lncRNA +RF02255 lncRNA +RF02256 lncRNA +RF02257 lncRNA +RF02258 lncRNA +RF02259 lncRNA +RF02267 lncRNA +RF02272 lncRNA +RF00027 miRNA +RF00047 miRNA +RF00051 miRNA +RF00052 miRNA +RF00053 miRNA +RF00073 miRNA +RF00074 miRNA +RF00075 miRNA +RF00076 miRNA +RF00103 miRNA +RF00104 miRNA +RF00129 miRNA +RF00130 miRNA +RF00131 miRNA +RF00143 miRNA +RF00144 miRNA +RF00178 miRNA +RF00237 miRNA +RF00239 miRNA +RF00241 miRNA +RF00244 miRNA +RF00245 miRNA +RF00246 miRNA +RF00247 miRNA +RF00248 miRNA +RF00249 miRNA +RF00250 miRNA +RF00251 miRNA +RF00253 miRNA +RF00254 miRNA +RF00255 miRNA +RF00256 miRNA +RF00257 miRNA +RF00258 miRNA +RF00363 miRNA +RF00364 miRNA +RF00365 miRNA +RF00366 miRNA +RF00367 miRNA +RF00445 miRNA +RF00446 miRNA +RF00451 miRNA +RF00452 miRNA +RF00455 miRNA +RF00456 miRNA +RF00464 miRNA +RF00486 miRNA +RF00637 miRNA +RF00638 miRNA +RF00639 miRNA +RF00640 miRNA +RF00641 miRNA +RF00642 miRNA +RF00643 miRNA +RF00644 miRNA +RF00645 miRNA +RF00646 miRNA +RF00647 miRNA +RF00648 miRNA +RF00649 miRNA +RF00650 miRNA +RF00651 miRNA +RF00652 miRNA +RF00653 miRNA +RF00654 miRNA +RF00655 miRNA +RF00656 miRNA +RF00657 miRNA +RF00658 miRNA +RF00659 miRNA +RF00660 miRNA +RF00661 miRNA +RF00662 miRNA +RF00663 miRNA +RF00664 miRNA +RF00665 miRNA +RF00666 miRNA +RF00667 miRNA +RF00668 miRNA +RF00669 miRNA +RF00670 miRNA +RF00671 miRNA +RF00672 miRNA +RF00673 miRNA +RF00674 miRNA +RF00675 miRNA +RF00676 miRNA +RF00677 miRNA +RF00678 miRNA +RF00679 miRNA +RF00680 miRNA +RF00681 miRNA +RF00682 miRNA +RF00683 miRNA +RF00684 miRNA +RF00685 miRNA +RF00686 miRNA +RF00687 miRNA +RF00688 miRNA +RF00689 miRNA +RF00690 miRNA +RF00691 miRNA +RF00692 miRNA +RF00693 miRNA +RF00694 miRNA +RF00695 miRNA +RF00696 miRNA +RF00697 miRNA +RF00698 miRNA +RF00699 miRNA +RF00700 miRNA +RF00701 miRNA +RF00702 miRNA +RF00703 miRNA +RF00704 miRNA +RF00705 miRNA +RF00706 miRNA +RF00707 miRNA +RF00708 miRNA +RF00709 miRNA +RF00710 miRNA +RF00711 miRNA +RF00712 miRNA +RF00713 miRNA +RF00714 miRNA +RF00715 miRNA +RF00716 miRNA +RF00717 miRNA +RF00718 miRNA +RF00719 miRNA +RF00720 miRNA +RF00721 miRNA +RF00722 miRNA +RF00723 miRNA +RF00724 miRNA +RF00725 miRNA +RF00726 miRNA +RF00727 miRNA +RF00728 miRNA +RF00729 miRNA +RF00730 miRNA +RF00731 miRNA +RF00732 miRNA +RF00733 miRNA +RF00734 miRNA +RF00735 miRNA +RF00736 miRNA +RF00737 miRNA +RF00739 miRNA +RF00740 miRNA +RF00741 miRNA +RF00742 miRNA +RF00743 miRNA +RF00744 miRNA +RF00745 miRNA +RF00746 miRNA +RF00747 miRNA +RF00748 miRNA +RF00749 miRNA +RF00750 miRNA +RF00751 miRNA +RF00752 miRNA +RF00753 miRNA +RF00754 miRNA +RF00755 miRNA +RF00756 miRNA +RF00757 miRNA +RF00758 miRNA +RF00760 miRNA +RF00761 miRNA +RF00762 miRNA +RF00763 miRNA +RF00764 miRNA +RF00765 miRNA +RF00766 miRNA +RF00767 miRNA +RF00768 miRNA +RF00769 miRNA +RF00770 miRNA +RF00771 miRNA +RF00772 miRNA +RF00773 miRNA +RF00774 miRNA +RF00775 miRNA +RF00776 miRNA +RF00777 miRNA +RF00778 miRNA +RF00779 miRNA +RF00780 miRNA +RF00781 miRNA +RF00782 miRNA +RF00783 miRNA +RF00784 miRNA +RF00785 miRNA +RF00786 miRNA +RF00787 miRNA +RF00788 miRNA +RF00789 miRNA +RF00790 miRNA +RF00791 miRNA +RF00792 miRNA +RF00793 miRNA +RF00794 miRNA +RF00795 miRNA +RF00796 miRNA +RF00797 miRNA +RF00798 miRNA +RF00799 miRNA +RF00800 miRNA +RF00801 miRNA +RF00802 miRNA +RF00803 miRNA +RF00804 miRNA +RF00805 miRNA +RF00806 miRNA +RF00807 miRNA +RF00808 miRNA +RF00809 miRNA +RF00810 miRNA +RF00811 miRNA +RF00812 miRNA +RF00813 miRNA +RF00814 miRNA +RF00815 miRNA +RF00816 miRNA +RF00817 miRNA +RF00818 miRNA +RF00819 miRNA +RF00820 miRNA +RF00821 miRNA +RF00822 miRNA +RF00823 miRNA +RF00824 miRNA +RF00825 miRNA +RF00826 miRNA +RF00827 miRNA +RF00828 miRNA +RF00829 miRNA +RF00830 miRNA +RF00831 miRNA +RF00832 miRNA +RF00833 miRNA +RF00834 miRNA +RF00835 miRNA +RF00836 miRNA +RF00837 miRNA +RF00838 miRNA +RF00839 miRNA +RF00840 miRNA +RF00841 miRNA +RF00842 miRNA +RF00843 miRNA +RF00844 miRNA +RF00845 miRNA +RF00846 miRNA +RF00847 miRNA +RF00848 miRNA +RF00849 miRNA +RF00850 miRNA +RF00851 miRNA +RF00852 miRNA +RF00853 miRNA +RF00854 miRNA +RF00855 miRNA +RF00856 miRNA +RF00857 miRNA +RF00858 miRNA +RF00859 miRNA +RF00861 miRNA +RF00862 miRNA +RF00863 miRNA +RF00864 miRNA +RF00865 miRNA +RF00866 miRNA +RF00867 miRNA +RF00868 miRNA +RF00869 miRNA +RF00870 miRNA +RF00871 miRNA +RF00872 miRNA +RF00873 miRNA +RF00874 miRNA +RF00875 miRNA +RF00876 miRNA +RF00877 miRNA +RF00878 miRNA +RF00879 miRNA +RF00882 miRNA +RF00883 miRNA +RF00884 miRNA +RF00885 miRNA +RF00886 miRNA +RF00887 miRNA +RF00888 miRNA +RF00890 miRNA +RF00891 miRNA +RF00892 miRNA +RF00893 miRNA +RF00894 miRNA +RF00895 miRNA +RF00896 miRNA +RF00897 miRNA +RF00898 miRNA +RF00899 miRNA +RF00900 miRNA +RF00901 miRNA +RF00902 miRNA +RF00903 miRNA +RF00904 miRNA +RF00905 miRNA +RF00906 miRNA +RF00907 miRNA +RF00908 miRNA +RF00909 miRNA +RF00910 miRNA +RF00911 miRNA +RF00912 miRNA +RF00914 miRNA +RF00915 miRNA +RF00917 miRNA +RF00918 miRNA +RF00919 miRNA +RF00920 miRNA +RF00921 miRNA +RF00922 miRNA +RF00925 miRNA +RF00926 miRNA +RF00927 miRNA +RF00928 miRNA +RF00929 miRNA +RF00931 miRNA +RF00932 miRNA +RF00933 miRNA +RF00934 miRNA +RF00935 miRNA +RF00936 miRNA +RF00937 miRNA +RF00939 miRNA +RF00940 miRNA +RF00941 miRNA +RF00942 miRNA +RF00943 miRNA +RF00945 miRNA +RF00946 miRNA +RF00947 miRNA +RF00948 miRNA +RF00949 miRNA +RF00950 miRNA +RF00951 miRNA +RF00952 miRNA +RF00953 miRNA +RF00954 miRNA +RF00955 miRNA +RF00956 miRNA +RF00957 miRNA +RF00958 miRNA +RF00959 miRNA +RF00960 miRNA +RF00961 miRNA +RF00962 miRNA +RF00963 miRNA +RF00964 miRNA +RF00965 miRNA +RF00966 miRNA +RF00967 miRNA +RF00968 miRNA +RF00969 miRNA +RF00970 miRNA +RF00971 miRNA +RF00972 miRNA +RF00973 miRNA +RF00974 miRNA +RF00975 miRNA +RF00976 miRNA +RF00977 miRNA +RF00978 miRNA +RF00979 miRNA +RF00980 miRNA +RF00981 miRNA +RF00983 miRNA +RF00984 miRNA +RF00985 miRNA +RF00986 miRNA +RF00987 miRNA +RF00988 miRNA +RF00989 miRNA +RF00990 miRNA +RF00991 miRNA +RF00992 miRNA +RF00993 miRNA +RF00994 miRNA +RF00995 miRNA +RF00996 miRNA +RF00997 miRNA +RF00998 miRNA +RF00999 miRNA +RF01000 miRNA +RF01001 miRNA +RF01002 miRNA +RF01003 miRNA +RF01004 miRNA +RF01005 miRNA +RF01006 miRNA +RF01007 miRNA +RF01008 miRNA +RF01009 miRNA +RF01010 miRNA +RF01011 miRNA +RF01012 miRNA +RF01013 miRNA +RF01014 miRNA +RF01015 miRNA +RF01016 miRNA +RF01017 miRNA +RF01018 miRNA +RF01019 miRNA +RF01020 miRNA +RF01021 miRNA +RF01022 miRNA +RF01023 miRNA +RF01024 miRNA +RF01025 miRNA +RF01026 miRNA +RF01027 miRNA +RF01028 miRNA +RF01029 miRNA +RF01030 miRNA +RF01031 miRNA +RF01032 miRNA +RF01033 miRNA +RF01034 miRNA +RF01035 miRNA +RF01036 miRNA +RF01037 miRNA +RF01038 miRNA +RF01039 miRNA +RF01040 miRNA +RF01041 miRNA +RF01042 miRNA +RF01043 miRNA +RF01044 miRNA +RF01045 miRNA +RF01059 miRNA +RF01061 miRNA +RF01063 miRNA +RF01064 miRNA +RF01117 miRNA +RF01314 miRNA +RF01413 miRNA +RF01895 miRNA +RF01896 miRNA +RF01897 miRNA +RF01898 miRNA +RF01899 miRNA +RF01900 miRNA +RF01901 miRNA +RF01902 miRNA +RF01903 miRNA +RF01910 miRNA +RF01911 miRNA +RF01912 miRNA +RF01913 miRNA +RF01914 miRNA +RF01915 miRNA +RF01916 miRNA +RF01917 miRNA +RF01918 miRNA +RF01919 miRNA +RF01920 miRNA +RF01921 miRNA +RF01922 miRNA +RF01923 miRNA +RF01924 miRNA +RF01925 miRNA +RF01926 miRNA +RF01927 miRNA +RF01936 miRNA +RF01937 miRNA +RF01938 miRNA +RF01939 miRNA +RF01940 miRNA +RF01941 miRNA +RF01942 miRNA +RF01943 miRNA +RF01944 miRNA +RF01945 miRNA +RF01996 miRNA +RF01997 miRNA +RF02000 miRNA +RF02002 miRNA +RF02006 miRNA +RF02007 miRNA +RF02008 miRNA +RF02009 miRNA +RF02010 miRNA +RF02011 miRNA +RF02013 miRNA +RF02014 miRNA +RF02015 miRNA +RF02016 miRNA +RF02017 miRNA +RF02018 miRNA +RF02019 miRNA +RF02020 miRNA +RF02021 miRNA +RF02022 miRNA +RF02023 miRNA +RF02024 miRNA +RF02025 miRNA +RF02026 miRNA +RF02027 miRNA +RF02028 miRNA +RF02061 miRNA +RF02092 miRNA +RF02093 miRNA +RF02094 miRNA +RF02095 miRNA +RF02096 miRNA +RF02097 miRNA +RF02214 miRNA +RF02244 miRNA +RF02245 miRNA +RF02254 miRNA +RF00001 rRNA +RF00002 rRNA +RF01118 rRNA +RF01960 rRNA +RF00177 rRNA +RF01959 rRNA +RF00003 snRNA +RF00004 snRNA +RF00007 snRNA +RF00012 snRNA +RF00015 snRNA +RF00016 snRNA +RF00020 snRNA +RF00026 snRNA +RF00045 snRNA +RF00046 snRNA +RF00049 snRNA +RF00054 snRNA +RF00055 snRNA +RF00056 snRNA +RF00065 snRNA +RF00066 snRNA +RF00067 snRNA +RF00068 snRNA +RF00069 snRNA +RF00070 snRNA +RF00071 snRNA +RF00072 snRNA +RF00085 snRNA +RF00086 snRNA +RF00087 snRNA +RF00088 snRNA +RF00089 snRNA +RF00090 snRNA +RF00091 snRNA +RF00092 snRNA +RF00093 snRNA +RF00095 snRNA +RF00096 snRNA +RF00097 snRNA +RF00099 snRNA +RF00105 snRNA +RF00108 snRNA +RF00132 snRNA +RF00133 snRNA +RF00134 snRNA +RF00135 snRNA +RF00136 snRNA +RF00137 snRNA +RF00138 snRNA +RF00139 snRNA +RF00142 snRNA +RF00145 snRNA +RF00147 snRNA +RF00149 snRNA +RF00150 snRNA +RF00151 snRNA +RF00152 snRNA +RF00153 snRNA +RF00154 snRNA +RF00155 snRNA +RF00156 snRNA +RF00157 snRNA +RF00158 snRNA +RF00159 snRNA +RF00160 snRNA +RF00181 snRNA +RF00186 snRNA +RF00187 snRNA +RF00188 snRNA +RF00189 snRNA +RF00190 snRNA +RF00191 snRNA +RF00200 snRNA +RF00201 snRNA +RF00202 snRNA +RF00203 snRNA +RF00204 snRNA +RF00205 snRNA +RF00206 snRNA +RF00208 snRNA +RF00211 snRNA +RF00212 snRNA +RF00213 snRNA +RF00217 snRNA +RF00218 snRNA +RF00221 snRNA +RF00231 snRNA +RF00263 snRNA +RF00264 snRNA +RF00265 snRNA +RF00266 snRNA +RF00267 snRNA +RF00268 snRNA +RF00270 snRNA +RF00271 snRNA +RF00272 snRNA +RF00273 snRNA +RF00274 snRNA +RF00275 snRNA +RF00276 snRNA +RF00277 snRNA +RF00278 snRNA +RF00279 snRNA +RF00280 snRNA +RF00281 snRNA +RF00282 snRNA +RF00283 snRNA +RF00284 snRNA +RF00285 snRNA +RF00286 snRNA +RF00287 snRNA +RF00288 snRNA +RF00289 snRNA +RF00291 snRNA +RF00292 snRNA +RF00293 snRNA +RF00294 snRNA +RF00295 snRNA +RF00296 snRNA +RF00300 snRNA +RF00301 snRNA +RF00302 snRNA +RF00303 snRNA +RF00304 snRNA +RF00305 snRNA +RF00306 snRNA +RF00307 snRNA +RF00309 snRNA +RF00310 snRNA +RF00311 snRNA +RF00312 snRNA +RF00313 snRNA +RF00314 snRNA +RF00315 snRNA +RF00316 snRNA +RF00317 snRNA +RF00318 snRNA +RF00319 snRNA +RF00320 snRNA +RF00321 snRNA +RF00322 snRNA +RF00323 snRNA +RF00324 snRNA +RF00325 snRNA +RF00326 snRNA +RF00327 snRNA +RF00328 snRNA +RF00329 snRNA +RF00330 snRNA +RF00331 snRNA +RF00332 snRNA +RF00333 snRNA +RF00334 snRNA +RF00335 snRNA +RF00336 snRNA +RF00337 snRNA +RF00338 snRNA +RF00339 snRNA +RF00340 snRNA +RF00341 snRNA +RF00342 snRNA +RF00343 snRNA +RF00344 snRNA +RF00345 snRNA +RF00348 snRNA +RF00349 snRNA +RF00350 snRNA +RF00351 snRNA +RF00352 snRNA +RF00353 snRNA +RF00355 snRNA +RF00356 snRNA +RF00357 snRNA +RF00358 snRNA +RF00359 snRNA +RF00360 snRNA +RF00361 snRNA +RF00377 snRNA +RF00392 snRNA +RF00393 snRNA +RF00394 snRNA +RF00396 snRNA +RF00397 snRNA +RF00398 snRNA +RF00399 snRNA +RF00400 snRNA +RF00401 snRNA +RF00402 snRNA +RF00403 snRNA +RF00404 snRNA +RF00405 snRNA +RF00406 snRNA +RF00407 snRNA +RF00408 snRNA +RF00409 snRNA +RF00410 snRNA +RF00411 snRNA +RF00412 snRNA +RF00413 snRNA +RF00414 snRNA +RF00415 snRNA +RF00416 snRNA +RF00417 snRNA +RF00418 snRNA +RF00419 snRNA +RF00420 snRNA +RF00421 snRNA +RF00422 snRNA +RF00423 snRNA +RF00424 snRNA +RF00425 snRNA +RF00426 snRNA +RF00427 snRNA +RF00428 snRNA +RF00429 snRNA +RF00430 snRNA +RF00431 snRNA +RF00432 snRNA +RF00438 snRNA +RF00439 snRNA +RF00440 snRNA +RF00441 snRNA +RF00443 snRNA +RF00471 snRNA +RF00472 snRNA +RF00473 snRNA +RF00474 snRNA +RF00475 snRNA +RF00476 snRNA +RF00477 snRNA +RF00478 snRNA +RF00479 snRNA +RF00482 snRNA +RF00488 snRNA +RF00492 snRNA +RF00493 snRNA +RF00494 snRNA +RF00509 snRNA +RF00526 snRNA +RF00527 snRNA +RF00528 snRNA +RF00529 snRNA +RF00530 snRNA +RF00531 snRNA +RF00532 snRNA +RF00533 snRNA +RF00535 snRNA +RF00536 snRNA +RF00537 snRNA +RF00538 snRNA +RF00539 snRNA +RF00540 snRNA +RF00541 snRNA +RF00542 snRNA +RF00543 snRNA +RF00544 snRNA +RF00545 snRNA +RF00546 snRNA +RF00548 snRNA +RF00553 snRNA +RF00554 snRNA +RF00560 snRNA +RF00561 snRNA +RF00562 snRNA +RF00563 snRNA +RF00564 snRNA +RF00565 snRNA +RF00566 snRNA +RF00567 snRNA +RF00568 snRNA +RF00569 snRNA +RF00570 snRNA +RF00571 snRNA +RF00572 snRNA +RF00573 snRNA +RF00574 snRNA +RF00575 snRNA +RF00576 snRNA +RF00577 snRNA +RF00578 snRNA +RF00579 snRNA +RF00580 snRNA +RF00581 snRNA +RF00582 snRNA +RF00584 snRNA +RF00586 snRNA +RF00588 snRNA +RF00591 snRNA +RF00592 snRNA +RF00593 snRNA +RF00594 snRNA +RF00598 snRNA +RF00599 snRNA +RF00600 snRNA +RF00601 snRNA +RF00602 snRNA +RF00603 snRNA +RF00604 snRNA +RF00606 snRNA +RF00607 snRNA +RF00608 snRNA +RF00609 snRNA +RF00610 snRNA +RF00611 snRNA +RF00612 snRNA +RF00613 snRNA +RF00614 snRNA +RF00618 snRNA +RF00619 snRNA +RF01119 snRNA +RF01120 snRNA +RF01121 snRNA +RF01122 snRNA +RF01123 snRNA +RF01124 snRNA +RF01125 snRNA +RF01126 snRNA +RF01127 snRNA +RF01128 snRNA +RF01129 snRNA +RF01130 snRNA +RF01131 snRNA +RF01132 snRNA +RF01133 snRNA +RF01134 snRNA +RF01135 snRNA +RF01136 snRNA +RF01137 snRNA +RF01138 snRNA +RF01139 snRNA +RF01140 snRNA +RF01141 snRNA +RF01142 snRNA +RF01143 snRNA +RF01144 snRNA +RF01145 snRNA +RF01146 snRNA +RF01147 snRNA +RF01148 snRNA +RF01149 snRNA +RF01150 snRNA +RF01151 snRNA +RF01152 snRNA +RF01153 snRNA +RF01155 snRNA +RF01156 snRNA +RF01157 snRNA +RF01158 snRNA +RF01159 snRNA +RF01160 snRNA +RF01161 snRNA +RF01162 snRNA +RF01163 snRNA +RF01164 snRNA +RF01165 snRNA +RF01166 snRNA +RF01167 snRNA +RF01168 snRNA +RF01169 snRNA +RF01170 snRNA +RF01171 snRNA +RF01172 snRNA +RF01173 snRNA +RF01174 snRNA +RF01175 snRNA +RF01176 snRNA +RF01177 snRNA +RF01178 snRNA +RF01179 snRNA +RF01180 snRNA +RF01181 snRNA +RF01182 snRNA +RF01183 snRNA +RF01184 snRNA +RF01185 snRNA +RF01186 snRNA +RF01188 snRNA +RF01189 snRNA +RF01190 snRNA +RF01191 snRNA +RF01192 snRNA +RF01193 snRNA +RF01194 snRNA +RF01195 snRNA +RF01196 snRNA +RF01197 snRNA +RF01198 snRNA +RF01199 snRNA +RF01200 snRNA +RF01201 snRNA +RF01202 snRNA +RF01203 snRNA +RF01204 snRNA +RF01205 snRNA +RF01206 snRNA +RF01207 snRNA +RF01208 snRNA +RF01209 snRNA +RF01210 snRNA +RF01211 snRNA +RF01212 snRNA +RF01213 snRNA +RF01214 snRNA +RF01215 snRNA +RF01216 snRNA +RF01218 snRNA +RF01219 snRNA +RF01220 snRNA +RF01221 snRNA +RF01222 snRNA +RF01223 snRNA +RF01224 snRNA +RF01225 snRNA +RF01226 snRNA +RF01227 snRNA +RF01228 snRNA +RF01229 snRNA +RF01230 snRNA +RF01231 snRNA +RF01232 snRNA +RF01233 snRNA +RF01234 snRNA +RF01235 snRNA +RF01236 snRNA +RF01237 snRNA +RF01238 snRNA +RF01239 snRNA +RF01240 snRNA +RF01241 snRNA +RF01242 snRNA +RF01243 snRNA +RF01244 snRNA +RF01245 snRNA +RF01246 snRNA +RF01247 snRNA +RF01248 snRNA +RF01249 snRNA +RF01250 snRNA +RF01251 snRNA +RF01252 snRNA +RF01253 snRNA +RF01254 snRNA +RF01255 snRNA +RF01256 snRNA +RF01257 snRNA +RF01258 snRNA +RF01259 snRNA +RF01260 snRNA +RF01261 snRNA +RF01262 snRNA +RF01263 snRNA +RF01264 snRNA +RF01265 snRNA +RF01266 snRNA +RF01267 snRNA +RF01268 snRNA +RF01269 snRNA +RF01270 snRNA +RF01271 snRNA +RF01272 snRNA +RF01273 snRNA +RF01274 snRNA +RF01275 snRNA +RF01276 snRNA +RF01277 snRNA +RF01278 snRNA +RF01279 snRNA +RF01280 snRNA +RF01281 snRNA +RF01283 snRNA +RF01284 snRNA +RF01285 snRNA +RF01286 snRNA +RF01287 snRNA +RF01288 snRNA +RF01289 snRNA +RF01290 snRNA +RF01291 snRNA +RF01292 snRNA +RF01293 snRNA +RF01294 snRNA +RF01295 snRNA +RF01296 snRNA +RF01297 snRNA +RF01298 snRNA +RF01299 snRNA +RF01300 snRNA +RF01301 snRNA +RF01302 snRNA +RF01303 snRNA +RF01304 snRNA +RF01305 snRNA +RF01306 snRNA +RF01307 snRNA +RF01308 snRNA +RF01309 snRNA +RF01310 snRNA +RF01311 snRNA +RF01312 snRNA +RF01420 snRNA +RF01421 snRNA +RF01422 snRNA +RF01423 snRNA +RF01424 snRNA +RF01425 snRNA +RF01426 snRNA +RF01427 snRNA +RF01428 snRNA +RF01429 snRNA +RF01430 snRNA +RF01431 snRNA +RF01432 snRNA +RF01433 snRNA +RF01434 snRNA +RF01435 snRNA +RF01436 snRNA +RF01437 snRNA +RF01438 snRNA +RF01439 snRNA +RF01440 snRNA +RF01441 snRNA +RF01442 snRNA +RF01443 snRNA +RF01444 snRNA +RF01445 snRNA +RF01446 snRNA +RF01447 snRNA +RF01448 snRNA +RF01449 snRNA +RF01450 snRNA +RF01451 snRNA +RF01452 snRNA +RF01498 snRNA +RF01499 snRNA +RF01500 snRNA +RF01501 snRNA +RF01505 snRNA +RF01506 snRNA +RF01507 snRNA +RF01509 snRNA +RF01511 snRNA +RF01513 snRNA +RF01514 snRNA +RF01515 snRNA +RF01516 snRNA +RF01522 snRNA +RF01523 snRNA +RF01524 snRNA +RF01525 snRNA +RF01526 snRNA +RF01531 snRNA +RF01532 snRNA +RF01533 snRNA +RF01534 snRNA +RF01535 snRNA +RF01536 snRNA +RF01537 snRNA +RF01538 snRNA +RF01539 snRNA +RF01540 snRNA +RF01541 snRNA +RF01542 snRNA +RF01543 snRNA +RF01544 snRNA +RF01545 snRNA +RF01546 snRNA +RF01547 snRNA +RF01548 snRNA +RF01549 snRNA +RF01550 snRNA +RF01551 snRNA +RF01552 snRNA +RF01553 snRNA +RF01554 snRNA +RF01555 snRNA +RF01556 snRNA +RF01557 snRNA +RF01558 snRNA +RF01559 snRNA +RF01560 snRNA +RF01561 snRNA +RF01562 snRNA +RF01563 snRNA +RF01564 snRNA +RF01565 snRNA +RF01566 snRNA +RF01567 snRNA +RF01568 snRNA +RF01569 snRNA +RF01570 snRNA +RF01572 snRNA +RF01573 snRNA +RF01574 snRNA +RF01575 snRNA +RF01576 snRNA +RF01583 snRNA +RF01584 snRNA +RF01585 snRNA +RF01586 snRNA +RF01587 snRNA +RF01588 snRNA +RF01589 snRNA +RF01590 snRNA +RF01591 snRNA +RF01592 snRNA +RF01593 snRNA +RF01594 snRNA +RF01595 snRNA +RF01596 snRNA +RF01597 snRNA +RF01598 snRNA +RF01599 snRNA +RF01600 snRNA +RF01601 snRNA +RF01602 snRNA +RF01603 snRNA +RF01604 snRNA +RF01605 snRNA +RF01606 snRNA +RF01607 snRNA +RF01608 snRNA +RF01609 snRNA +RF01610 snRNA +RF01611 snRNA +RF01612 snRNA +RF01613 snRNA +RF01614 snRNA +RF01615 snRNA +RF01617 snRNA +RF01618 snRNA +RF01620 snRNA +RF01621 snRNA +RF01622 snRNA +RF01624 snRNA +RF01625 snRNA +RF01626 snRNA +RF01627 snRNA +RF01628 snRNA +RF01629 snRNA +RF01630 snRNA +RF01631 snRNA +RF01632 snRNA +RF01633 snRNA +RF01634 snRNA +RF01635 snRNA +RF01636 snRNA +RF01637 snRNA +RF01638 snRNA +RF01639 snRNA +RF01640 snRNA +RF01641 snRNA +RF01642 snRNA +RF01644 snRNA +RF01645 snRNA +RF01646 snRNA +RF01647 snRNA +RF01648 snRNA +RF01649 snRNA +RF01650 snRNA +RF01651 snRNA +RF01652 snRNA +RF01653 snRNA +RF01654 snRNA +RF01655 snRNA +RF01658 snRNA +RF01659 snRNA +RF01660 snRNA +RF01661 snRNA +RF01662 snRNA +RF01664 snRNA +RF01802 snRNA +RF01829 snRNA +RF01844 snRNA +RF01846 snRNA +RF01847 snRNA +RF01848 snRNA +RF01860 snRNA +RF01861 snRNA +RF01862 snRNA +RF01863 snRNA +RF01864 snRNA +RF01866 snRNA +RF02163 snRNA +RF00014 sRNA +RF00018 sRNA +RF00021 sRNA +RF00034 sRNA +RF00035 sRNA +RF00057 sRNA +RF00077 sRNA +RF00078 sRNA +RF00079 sRNA +RF00081 sRNA +RF00082 sRNA +RF00083 sRNA +RF00084 sRNA +RF00101 sRNA +RF00110 sRNA +RF00111 sRNA +RF00112 sRNA +RF00113 sRNA +RF00115 sRNA +RF00116 sRNA +RF00117 sRNA +RF00118 sRNA +RF00119 sRNA +RF00121 sRNA +RF00122 sRNA +RF00124 sRNA +RF00125 sRNA +RF00126 sRNA +RF00128 sRNA +RF00166 sRNA +RF00195 sRNA +RF00368 sRNA +RF00369 sRNA +RF00370 sRNA +RF00371 sRNA +RF00372 sRNA +RF00378 sRNA +RF00444 sRNA +RF00505 sRNA +RF00519 sRNA +RF00615 sRNA +RF00616 sRNA +RF01116 sRNA +RF01385 sRNA +RF01386 sRNA +RF01387 sRNA +RF01388 sRNA +RF01389 sRNA +RF01390 sRNA +RF01391 sRNA +RF01392 sRNA +RF01393 sRNA +RF01394 sRNA +RF01395 sRNA +RF01396 sRNA +RF01397 sRNA +RF01398 sRNA +RF01399 sRNA +RF01400 sRNA +RF01401 sRNA +RF01402 sRNA +RF01403 sRNA +RF01404 sRNA +RF01405 sRNA +RF01406 sRNA +RF01407 sRNA +RF01408 sRNA +RF01409 sRNA +RF01410 sRNA +RF01411 sRNA +RF01412 sRNA +RF01457 sRNA +RF01459 sRNA +RF01460 sRNA +RF01461 sRNA +RF01462 sRNA +RF01463 sRNA +RF01464 sRNA +RF01465 sRNA +RF01466 sRNA +RF01467 sRNA +RF01468 sRNA +RF01469 sRNA +RF01470 sRNA +RF01471 sRNA +RF01472 sRNA +RF01473 sRNA +RF01474 sRNA +RF01476 sRNA +RF01477 sRNA +RF01478 sRNA +RF01479 sRNA +RF01487 sRNA +RF01488 sRNA +RF01489 sRNA +RF01492 sRNA +RF01493 sRNA +RF01494 sRNA +RF01496 sRNA +RF01503 sRNA +RF01504 sRNA +RF01512 sRNA +RF01519 sRNA +RF01520 sRNA +RF01521 sRNA +RF01527 sRNA +RF01528 sRNA +RF01529 sRNA +RF01530 sRNA +RF01571 sRNA +RF01578 sRNA +RF01579 sRNA +RF01580 sRNA +RF01581 sRNA +RF01582 sRNA +RF01619 sRNA +RF01623 sRNA +RF01643 sRNA +RF01656 sRNA +RF01663 sRNA +RF01665 sRNA +RF01668 sRNA +RF01669 sRNA +RF01670 sRNA +RF01671 sRNA +RF01672 sRNA +RF01673 sRNA +RF01674 sRNA +RF01675 sRNA +RF01676 sRNA +RF01677 sRNA +RF01678 sRNA +RF01679 sRNA +RF01680 sRNA +RF01681 sRNA +RF01682 sRNA +RF01683 sRNA +RF01684 sRNA +RF01685 sRNA +RF01686 sRNA +RF01687 sRNA +RF01690 sRNA +RF01691 sRNA +RF01693 sRNA +RF01694 sRNA +RF01696 sRNA +RF01698 sRNA +RF01699 sRNA +RF01700 sRNA +RF01701 sRNA +RF01702 sRNA +RF01703 sRNA +RF01705 sRNA +RF01706 sRNA +RF01710 sRNA +RF01712 sRNA +RF01714 sRNA +RF01718 sRNA +RF01719 sRNA +RF01722 sRNA +RF01723 sRNA +RF01728 sRNA +RF01732 sRNA +RF01742 sRNA +RF01757 sRNA +RF01762 sRNA +RF01775 sRNA +RF01781 sRNA +RF01782 sRNA +RF01783 sRNA +RF01784 sRNA +RF01789 sRNA +RF01791 sRNA +RF01793 sRNA +RF01796 sRNA +RF01808 sRNA +RF01810 sRNA +RF01812 sRNA +RF01814 sRNA +RF01815 sRNA +RF01816 sRNA +RF01817 sRNA +RF01818 sRNA +RF01819 sRNA +RF01820 sRNA +RF01821 sRNA +RF01822 sRNA +RF01823 sRNA +RF01827 sRNA +RF01828 sRNA +RF01858 sRNA +RF01867 sRNA +RF02029 sRNA +RF02030 sRNA +RF02031 sRNA +RF02049 sRNA +RF02050 sRNA +RF02051 sRNA +RF02052 sRNA +RF02053 sRNA +RF02054 sRNA +RF02055 sRNA +RF02056 sRNA +RF02057 sRNA +RF02059 sRNA +RF02060 sRNA +RF02062 sRNA +RF02063 sRNA +RF02064 sRNA +RF02065 sRNA +RF02066 sRNA +RF02067 sRNA +RF02070 sRNA +RF02071 sRNA +RF02072 sRNA +RF02073 sRNA +RF02074 sRNA +RF02075 sRNA +RF02077 sRNA +RF02078 sRNA +RF02079 sRNA +RF02080 sRNA +RF02081 sRNA +RF02082 sRNA +RF02099 sRNA +RF02100 sRNA +RF02151 sRNA +RF02221 sRNA +RF02222 sRNA +RF02223 sRNA +RF02224 sRNA +RF02225 sRNA +RF02226 sRNA +RF02227 sRNA +RF02228 sRNA +RF02230 sRNA +RF02231 sRNA +RF02232 sRNA +RF02233 sRNA +RF02234 sRNA +RF02235 sRNA +RF02236 sRNA +RF02237 sRNA +RF02238 sRNA +RF02239 sRNA +RF02240 sRNA +RF02241 sRNA +RF02242 sRNA +RF02243 sRNA +RF02268 sRNA +RF02269 sRNA +RF00127 sRNA +RF01852 tRNA +RF00005 tRNA diff -r 2ed1e1728299 -r 9dcffd531c76 filterReadsByCount.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterReadsByCount.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,116 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2010-01 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","o=s","mark:s","h"); +if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample"; +my @mark=split /\#/,$mark; + +open OUT,">$opts{o}"; +open IN,"<$opts{i}"; +my %hash;my %reads; +while (my $aline=) { + chomp $aline; + my $seq=; + chomp $seq; + if($aline=~/:([\d|_]+)_x(\d+)$/){ + if ($2>3) { + my @ss=split/_/,$1; + for (my $i=0;$i<@ss;$i++) { + $hash{length($seq)}[$i]++ if($ss[$i]>0); + $hash{length($seq)}[$i] +=0 if($ss[$i]==0); + $reads{length($seq)}[$i]+=$ss[$i]; + } + print OUT "$aline\n$seq\n"; + } + } +} +close IN; +close OUT; + +my $dir=dirname($opts{'o'}); +chdir $dir; +my $lengthfile=$dir."/reads_length_distribution_after_count_filter.txt"; +open OUT, ">$lengthfile"; +open R,">$dir/length_distribution_after_count_filter.R"; + +print OUT "Tags length\t@mark\n"; + +my $samNo=@mark; +my $avalue=""; +my @length=sort{$a<=>$b} keys %hash; +foreach (@length) { + print OUT $_,"\t@{$hash{$_}}\n"; + my $vv=join ", ",@{$hash{$_}}; + $avalue .="$vv,"; +} +$avalue =~s/,$//; +my $lengths=join ",",@length; +my $marks=join "\",\"",@mark; + +print R "a<-c($avalue) +b<-matrix(a,ncol=$samNo,byrow=T) +cl<-colors() +names=c($lengths) +legends=c(\"$marks\") +png(\"Tags_length_after_count_filter.png\",width=800,height=600) +barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution After Count Filter\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") +abline(h=0) +dev.off() + +"; +$avalue=""; +print OUT "\nReads length\t@mark\n"; +foreach (@length) { + print OUT $_,"\t@{$reads{$_}}\n"; + my $vv=join ", ", @{$reads{$_}}; + $avalue .= "$vv,"; +} +$avalue =~s/,$//; + +print R "a<-c($avalue)\n +b<-matrix(a,ncol=$samNo,byrow=T) + +png(\"Reads_length_after_count_filter.png\",width=800,height=600) +barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution After Count Filter\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") +abline(h=0) +dev.off() + +"; +close OUT; +close R; + +system ("R CMD BATCH $dir/length_distribution_after_count_filter.R"); + +#system ("rm $dir/length_distribution.R"); +#system ("rm $dir/length_distribution.Rout"); +#system ("rm $dir/.RData"); +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -min -max -mark +options: + +-i input file +-o output file +-mark string #sample name eg: samA#samB#samC +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 filterReadsByLength_1.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filterReadsByLength_1.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,122 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2010-01 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","min=i","max=i","o=s","mark:s","h"); +if (!(defined $opts{i} and defined $opts{o} and defined $opts{min} and defined $opts{max}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $mark=defined $opts{'mark'} ? $opts{'mark'} : "Sample"; +my @mark=split /\#/,$mark; + + +open OUT,">$opts{o}"; +open IN,"<$opts{i}"; +my %hash;my %reads; +while (my $aline=) { + chomp $aline; + my $seq=; + chomp $seq; + + if($aline=~/:([\d|_]+)_x(\d+)$/){ + my @ss=split/_/,$1; + for (my $i=0;$i<@ss;$i++) { + $hash{length($seq)}[$i]++ if($ss[$i]>0); + $hash{length($seq)}[$i] +=0 if($ss[$i]==0); + $reads{length($seq)}[$i]+=$ss[$i]; + } + } + #else{$reads{length($seq)}+=1;} + if (length ($seq)>=$opts{'min'} && length ($seq) <=$opts{'max'}) { + print OUT "$aline\n$seq\n"; + } +} +close IN; +close OUT; + +my $dir=dirname($opts{'o'}); +chdir $dir; +my $lengthfile=$dir."/reads_length_distribution.txt"; +print "$lengthfile\n"; +open OUT, ">$lengthfile"; +open R,">$dir/length_distribution.R"; + +print OUT "Tags length\t@mark\n"; + +my $samNo=@mark; +my $avalue=""; +my @length=sort{$a<=>$b} keys %hash; +foreach (@length) { + print OUT $_,"\t@{$hash{$_}}\n"; + my $vv=join ", ",@{$hash{$_}}; + $avalue .="$vv,"; +} +$avalue =~s/,$//; +my $lengths=join ",",@length; +my $marks=join "\",\"",@mark; + +print R "a<-c($avalue) +b<-matrix(a,ncol=$samNo,byrow=T) +cl<-colors() +names=c($lengths) +legends=c(\"$marks\") +png(\"Tags_length.png\",width=800,height=600) +barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Tags Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") +abline(h=0) +dev.off() + +"; +$avalue=""; +print OUT "\nReads length\t@mark\n"; +foreach (@length) { + print OUT $_,"\t@{$reads{$_}}\n"; + my $vv=join ", ", @{$reads{$_}}; + $avalue .= "$vv,"; +} +$avalue =~s/,$//; + +print R "a<-c($avalue)\n +b<-matrix(a,ncol=$samNo,byrow=T) + +png(\"Reads_length.png\",width=800,height=600) +barplot(t(b),beside=TRUE,col=cl[1:$samNo],main=\"Reads Length Distribution\",names.arg=names,ylim=c(0,max(a)),legend.text=legends,args.legend=\"topleft\") +abline(h=0) +dev.off() + +"; +close OUT; +close R; + +system ("R CMD BATCH $dir/length_distribution.R"); + +#system ("rm $dir/length_distribution.R"); +#system ("rm $dir/length_distribution.Rout"); +#system ("rm $dir/.RData"); +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -min -max -mark +options: + +-i input file +-o output file +-min reads min length. +-max reads max length. +-mark string #sample name eg: samA#samB#samC +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 get_genelist.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_genelist.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,62 @@ +#!/usr/bin/perl -w +#Filename: +#Author: chentt +#Email: +#Date: 2012-4-6 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","o=s","h"); +if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments +&usage; +} +open IN,"<$opts{i}"; +open OUT ,">$opts{o}"; +print OUT "#ID\tchr\tstart\tend\tstrand\n"; +my $n=1; +my %gene1; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @tmp=split/\t/,$aline; + my $ID; + if ($tmp[2] eq "gene") { + $tmp[0]=~s/Chr\./Chr/; + #$tmp[0]=~s/Chr/chr/; + my @infor=split/;/,$tmp[8]; + for (my $i=0;$i<@infor ;$i++) { + if ($infor[$i]=~/Alias\=(\S+)$/) { + $ID=$1; + last; + } + else { + $ID="unknown$n"; + $n++; + } + } + #$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] + push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]]; + print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; + } +} +close IN; +close OUT; + + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -h +options: +-i input cluster file +-o output file +-h help +USAGE +exit(1); +} \ No newline at end of file diff -r 2ed1e1728299 -r 9dcffd531c76 html.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/html.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,788 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2014-5-29 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","format=s","o=s","h"); +if (!(defined $opts{o} and defined $opts{format} and defined $opts{i} ) || defined $opts{h}) { #necessary arguments +&usage; +} +my ($config,$prepath,$rfampath,$genomepath,$clusterpath,$annotatepath,$plotpath,$degpath); +my ($predir,$rfamdir,$genomedir,$clusterdir,$annotatedir,$plotdir,$degdir); +open IN,"<$opts{i}"; +$config=; chomp $config; +$prepath=; chomp $prepath; +$rfampath=;chomp $rfampath; +$genomepath=; chomp $genomepath; +$clusterpath=; chomp $clusterpath; +$annotatepath=; chomp $annotatepath; +$plotpath=; chomp $plotpath; +my $deg_tag=1; +if(eof){$deg_tag=0;} +else{ + $degpath=; chomp $degpath; +} +close IN; +my @tmp=split/\//,$prepath; +$predir=$tmp[-1]; +@tmp=split/\//,$rfampath; +$rfamdir=$tmp[-1]; +@tmp=split/\//,$genomepath; +$genomedir=$tmp[-1]; +@tmp=split/\//,$clusterpath; +$clusterdir=$tmp[-1]; +@tmp=split/\//,$annotatepath; +$annotatedir=$tmp[-1]; +@tmp=split/\//,$plotpath; +$plotdir=$tmp[-1]; + +my $dir=dirname($opts{'o'}); + +open OUT ,">$opts{'o'}"; +print OUT "\n \n Analysis Report \n + \n

\n \n Small RNA Analysis Report\n \n

+

1. Sequence No. and quality

+

1.1 Sequece No.

+"; + +### raw data no +open IN,"<$config"; +my @files;my @marks; my @rawNo; +while (my $aline=) { + chomp $aline; + my @tmp=split/\t/,$aline; + push @files,$tmp[0]; + + my $no=`less $tmp[0] |wc -l `; + chomp $no; + if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { + $no=$no/4; + } + else{ + $no=$no/2; + } + push @rawNo,$no; + + push @marks,$tmp[1]; +} +close IN; + +### preprocess +unless ($prepath=~/\/$/) { + $prepath .="/"; +} + +my @trimNo;my @collapse; +my $collapsefile=$prepath."collapse_reads.fa"; +open IN,"<$collapsefile"; +while (my $aline=) { + chomp $aline; + ; + $aline=~/:([\d|_]+)_x(\d+)$/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $trimNo[$i] +=$lng[$i]; + $collapse[$i] ++; + } + } +} +close IN; + +my @cleanR;my @cleanT; +my $clean=$prepath."collapse_reads_18-40.fa"; +open IN,"<$clean"; +while (my $aline=) { + chomp $aline; + ; + $aline=~/:([\d|_]+)_x(\d+)$/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $cleanR[$i] +=$lng[$i]; + $cleanT[$i] ++; + } + } +} +close IN; + +my @filterR;my @filterT; +my $filter=$prepath."collapse_reads_out.fa"; +open IN,"<$filter"; +while (my $aline=) { + chomp $aline; + ; + $aline=~/:([\d|_]+)_x(\d+)$/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $filterR[$i] +=$lng[$i]; + $filterT[$i] ++; + } + } +} +close IN; + + +print OUT " + + +"; +foreach (@marks) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@rawNo) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@trimNo) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@collapse) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@cleanR) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@cleanT) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@filterR) { + print OUT "\n"; +} +print OUT " + + +"; +foreach (@filterT) { + print OUT "\n"; +} +print OUT "\n
  $_
Raw Reads No. $_
Reads No. After Trimed 3\' adapter $_
Unique Tags No. $_
Clean Reads No. $_
Clean Tags No. $_
Filter Reads No. \(reads count \>3\) $_
Filter Tags No. \(reads count \>3\) $_
"; +print OUT "

+Note:
+The raw data file path is: $files[0]
+"; +for (my $i=1;$i<@files;$i++) { + print OUT "          $files[$i]
"; +} +print OUT "The collapsed file path is: $collapsefile
+The clean data file path is: $clean
+The filter (remain total reads>3) data file path is: $filter
+

+

1. Sequence length count

+"; +print OUT "\n"; + +my $length=$prepath."length.html"; +open IN,"<$length"; +while (my $aline=) { + chomp $aline; + print OUT "$aline\n"; +} +close IN; + +print OUT "

Note:
The sequence length data: length file +

+"; + +#### rfam +unless ($rfampath=~/\/$/) { + $rfampath .="/"; +} +unless ($genomepath=~/\/$/) { + $genomepath .="/"; +} +print OUT "

2. Rfam non-miRNA annotation

+

2.1 Reads count

+ + +"; + +my @rfamR; my @rfamT; +my $tag=1; +open IN,"<$dir/rfam_match/rfam_non-miRNA_annotation.txt"; +while (my $aline=) { + chomp $aline; + $tag=0 if($aline=~/tags\s+number/); + next if($aline=~/^\#/); + next if($aline=~/^\s*$/); + my @tmp=split/\s+/,$aline; + if($tag == 1){push @rfamR,[@tmp];} + else{push @rfamT,[@tmp];} +} +close IN; + + +print OUT "\n"; +foreach (@marks) { + print OUT "\n"; +} +for (my $i=0;$i<@rfamR;$i++) { + print OUT " + + + "; + for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { + print OUT "\n"; + } +} + +print OUT "\n
RNA Name $_
$rfamR[$i][0] $rfamR[$i][$j]
+

2.2 Tags count

+ + + \n"; +foreach (@marks) { + print OUT "\n"; +} +for (my $i=0;$i<@rfamT;$i++) { + print OUT " + + + "; + for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { + print OUT "\n"; + } +} +print OUT "\n
RNA Name $_
$rfamT[$i][0] $rfamT[$i][$j]
+

Note:
The rfam mapping results is: $rfampath"; +print OUT "rfam_mapped.bwt

"; + +open IN,"<$dir/genome_match/genome_mapped.bwt"; +my @genome_r_u; +my @genome_r_m; +my @genome_t_u; +my @genome_t_m; +my $tags_map_number=0; +while (my $aline=) { + chomp $aline; + my @temp=split/\t/,$aline; + if ($temp[6]==0) { + $aline=~/:([\d|_]+)_x(\d+)/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $genome_r_u[$i] +=$lng[$i]; + $genome_t_u[$i] ++; + } + } + $tags_map_number++; + } + if ($temp[6]>0) { + $aline=~/:([\d|_]+)_x(\d+)/; + my @lng=split/_/,$1; + for (my $i=0;$i<@lng;$i++) { + if ($lng[$i]>0) { + $genome_r_m[$i] +=$lng[$i]; + $genome_t_m[$i] ++; + } + } + for (my $i=0;$i<$temp[6] ;$i++) { + my $next=; + } + $tags_map_number++; + } +} +close IN; +#

3.1 Reads count

+# +# +print OUT "

3. genome mapping result

+
+ +\n +"; +foreach (@marks) { + print OUT "\n"; +} +print OUT " + + +"; +for (my $i=0;$i<@genome_r_u ;$i++) { + print OUT "\n"; +} + +print OUT " + + +"; +for (my $i=0;$i<@genome_t_u ;$i++) { + print OUT "\n"; +} + +print OUT " + + +"; +for (my $i=0;$i<@genome_r_m ;$i++) { + print OUT "\n"; +} + +print OUT " + + +"; +for (my $i=0;$i<@genome_t_m ;$i++) { + print OUT "\n"; +} + +print OUT "\n
Map $_
Uniq Map Reads No. $genome_r_u[$i]
Uniq Map Tags No. $genome_t_u[$i]
Multiple Map Reads No. $genome_r_m[$i]
Multiple Map Tags No. $genome_t_m[$i]
+

Note:
The genome mapping results is: $genomepath"; +print OUT "genome_mapped.bwt

"; + +my $cluster="$clusterpath/sample_reads.cluster"; +my $cluster_number=`less $cluster |wc -l `; +$cluster_number=$cluster_number-1; +my (%cluster_length,@exp,@rpkm); +my @exp_range=qw(0 \(0--10] \(10--100] \(100--1000] \(1000--10000] \(10000--100000] \(100000--**\)); +my @rpkm_range=qw(0 \(0--0.25] \(0.25--0.5] \(0.5--1] \(1.0-5.0] \(5--10] \(10--50] \(50--100] \(100--500] \(500--1000] \(1000--**]); + +open IN,"<$cluster"; +while (my $aline=) { + next if($aline=~/^\"/); + chomp $aline; + my @temp=split/\t/,$aline; + my @id=split/:|-/,$temp[0]; + $cluster_length{$id[2]-$id[1]+1}++; + for (my $i=0;$i<@marks ;$i++) { + if ($temp[$i+3] == 0) {$exp[0][$i]++;} + elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[1][$i]++;} + elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[2][$i]++;} + elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[3][$i]++;} + elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[4][$i]++;} + elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[5][$i]++;} + elsif ($temp[$i+3]>100000){$exp[6][$i]++;} + } +} +close IN; + +my $cluster_rpkm="$clusterpath/sample_rpkm.cluster"; +open IN,"<$cluster_rpkm"; +while (my $aline=) { + next if($aline=~/^\#/); + chomp $aline; + my @temp=split/\t/,$aline; + for (my $i=0;$i<@marks ;$i++) { + if ($temp[$i+3]==0) {$rpkm[0][$i]++;} + elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[1][$i]++;} + elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[2][$i]++;} + elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[3][$i]++;} + elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[4][$i]++;} + elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[5][$i]++;} + elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[6][$i]++;} + elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[7][$i]++;} + elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[8][$i]++;} + elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[9][$i]++;} + else{$rpkm[10][$i]++;} + } +} + +close IN; + +my $cluster_length_file="$clusterpath/cluster_length.txt"; +open LEN,">$cluster_length_file"; +print LEN "\#length\tcluster_number\n"; +foreach my $key (sort keys %cluster_length) { + print LEN "$key\t$cluster_length{$key}\n"; +} +close LEN; +print OUT "

4. cluster result

+

4.1 Cluster count

+ + + + + + + + + +\n
Merged samples
Tags number$tags_map_number
Cluster number$cluster_number
+"; + +print OUT "

4.2 Cluster length

+

Note:
The clusters length data: length file +

+"; +print OUT "

4.3 Quantify

+ + +\n +"; +foreach (@marks) { + print OUT "\n"; +} +for (my $i=0;$i<@exp_range;$i++) { + print OUT " + + + "; + for (my $j=0;$j<@marks ;$j++) { + if (!(defined($exp[$i][$j]))) { + print OUT "\n"; + } + else{print OUT "\n";} + } +} +print OUT "\n
Reads Range $_
$exp_range[$i] 0 $exp[$i][$j]
"; + +print OUT "\n + +\n +"; +foreach (@marks) { + print OUT "\n"; +} +for (my $i=0;$i<@rpkm_range;$i++) { + print OUT " + + + "; + for (my $j=0;$j<@marks ;$j++) { + if (!(defined($rpkm[$i][$j]))) { + print OUT "\n"; + } + else{print OUT "\n";} + } +} +print OUT "\n
RPKM Range $_
$rpkm_range[$i] 0 $rpkm[$i][$j]
"; + +my $annotate="$annotatepath/sample_c_p.anno"; +my (%posit,%repeat,%nat1,%nat2); +my (@phase,@long,@repeat,@nat); +for (my $j=0;$j<@marks ;$j++) { + $phase[$j]=0; + $long[$j]=0; + $repeat[$j]=0; + $nat[$j]=0; +} + +my $class_anno=1; +open ANNO,"<$annotate"; +while (my $aline=) { + chomp $aline; + my @temp=split/\t/,$aline; + if($aline=~/^\#/){ + if (@temp != 10+@marks) { + $class_anno=0; + } + next; + } + for (my $i=3+@marks+$class_anno;$i<@temp;$i++) { + my @posit=split/\;/,$temp[$i]; + for (my $j=0;$j<@marks ;$j++) { + if ($temp[3+$j]>0) { + $posit{$posit[0]}[$j]++; + } + else{ + if (!(defined($posit{$posit[0]}[$j]))) { + $posit{$posit[0]}[$j]=0; + } + } + } + } + if ($class_anno) { + for (my $j=0;$j<@marks ;$j++) { + if ($temp[3+$j]>0) { + if ($temp[6] eq "phase") { + $phase[$j]++; + } + if ($temp[7] eq "long") { + $long[$j]++; + } + if ($temp[8] ne "\/") { + $repeat[$j]++; + my @rr=split/\;/,$temp[8]; + foreach (@rr) { + $repeat{$_}[$j]++; + } + } + if ($temp[9] ne "\/") { + $nat[$j]++; + my @nn1=split/\;/,$temp[9]; + my @nn2=split/\;/,$temp[10]; + for (my $k=0;$k<@nn1 ;$k++) { + $nat1{$nn1[$k]}[$j]++; + $nat2{$nn2[$k]}[$j]++; + } + } + } + } + } +} +close ANNO; + +print OUT "

5. Cluster Annotate

+

5.1 Cluster genome position annotate

+ + +\n +"; + +foreach (@marks) { + print OUT "\n"; +} +foreach my $key (sort keys %posit) { + print OUT " + + + "; + foreach (@{$posit{$key}}) { + print OUT "\n"; + } +} +print OUT "\n
clusters number $_
$key $_
"; +print OUT "

+Note:
+One cluster mybe annotate to multiple genes
+"; + +if ($class_anno) { + print OUT "

5.2 Cluster source mechanism annotate

+ + + \n + "; + + foreach (@marks) { + print OUT "\n"; + } + print OUT " + + \n + "; + foreach (@phase) { + print OUT "\n"; + } + + print OUT " + + \n + "; + foreach (@long) { + print OUT "\n"; + } + + print OUT " + + \n + "; + foreach (@repeat) { + print OUT "\n"; + } + + print OUT " + + \n + "; + foreach (@nat) { + print OUT "\n"; + } + print OUT "\n
clusters number $_
Phase $_
Long $_
Repeat $_
Nat $_
"; + + print OUT "

+ Repeat subclass annotate: + "; + + print OUT " + + \n + "; + foreach (@marks) { + print OUT "\n"; + } + + foreach my $key (sort keys %repeat) { + print OUT " + + + "; + for (my $i=0;$i<@marks ;$i++) { + if (defined($repeat{$key}[$i])) { + print OUT "\n"; + } + else{print OUT "\n";} + } + } + print OUT "\n
Repeat subclass $_
$key $repeat{$key}[$i] 0
"; + + + print OUT "

+ Nat subclass1 annotate: + "; + + print OUT " + + \n + "; + foreach (@marks) { + print OUT "\n"; + } + foreach my $key (sort keys %nat1) { + print OUT " + + + "; + for (my $i=0;$i<@marks ;$i++) { + if (defined($nat1{$key}[$i])) { + print OUT "\n"; + } + else{print OUT "\n";} + } + } + print OUT "\n
Nat subclass1 $_
$key $nat1{$key}[$i] 0
"; + + print OUT "

+ Nat subclass2 annotate: + "; + + print OUT " + + \n + "; + foreach (@marks) { + print OUT "\n"; + } + foreach my $key (sort keys %nat2) { + print OUT " + + + "; + for (my $i=0;$i<@marks ;$i++) { + if (defined($nat2{$key}[$i])) { + print OUT "\n"; + } + else{print OUT "\n";} + } + } + print OUT "\n
Nat subclass2 $_
$key $nat2{$key}[$i] 0
"; + print OUT "

+ Note:
+ One cluster mybe annotate to multiple repeats or nats
+ "; +} +else { + print OUT "

5.2 Cluster source mechanism annotate

+
Do not do source mechanism annotate
"; + +} + +print OUT "

6. Graph of Clusters of all samples

\n"; + +my $plot=$plotpath."cluster.html"; +open IN,"<$plot"; +while (my $aline=) { + chomp $aline; + print OUT "$aline\n"; +} +close IN; + + +if ($deg_tag) { + my $deg_file=`ls $degpath`; + chomp $deg_file; + my @deg_file=split/\n/,$deg_file; + my %deg; + foreach (@deg_file) { + my $output="$degpath/$_/output_score.txt"; + open IN,"<$output"; + $deg{$_}[0]=0; + $deg{$_}[1]=0; + $deg{$_}[2]=0; + while (my $aline=) { + next if ($aline=~/^\"/); + chomp $aline; + my @temp=split/\t/,$aline; + if ($temp[9] eq "TRUE") { + $deg{$_}[0]++; + if ($temp[4] >0) { + $deg{$_}[1]++; + } + if ($temp[4] <0) { + $deg{$_}[2]++; + } + } + } + close IN; + } + + print OUT "

7. DEG

+ + + \n + \n + \n + \n + "; + + foreach my $key (sort keys %deg) { + print OUT " + + + "; + for (my $i=0;$i<@{$deg{$key}} ;$i++) { + print OUT "\n"; + } + } + print OUT "\n
Genes number DEG UP DOWN
$key $deg{$key}[$i]
"; +} +else{ + print OUT "

7. DEG

+
Do not do DE clusters
"; +} + +print OUT " + + +"; +close OUT; + + + + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -o +options: +-i +-format +-o output file +-h help +USAGE +exit(1); +} diff -r 2ed1e1728299 -r 9dcffd531c76 install_DEG.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install_DEG.R Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,2 @@ +source("http://bioconductor.org/biocLite.R") +biocLite("DEGseq") diff -r 2ed1e1728299 -r 9dcffd531c76 matching.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matching.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,71 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2013/7/19 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +unless ($fileout=~/\/$/) {$fileout.="/";} +my $genome=$opts{'g'}; +my $mis=defined $opts{'v'}? $opts{'v'} : 0; +my $hits=defined $opts{'r'}? $opts{'r'} : 25; +my $index=defined $opts{'index'} ? $opts{'index'} : ""; +my $threads=defined $opts{'p'} ? $opts{'p'} : 1; + + +#my $time=time(); +#my $mapdir=$fileout."/genome_match_".$time; +my $mapdir=$fileout."/genome_match"; +mkdir $mapdir; +chdir $mapdir; +###check genome index +if (-s $index.".1.ebwt") { +}else{ + `bowtie-build $genome genome`; + $index="genome"; +} + +### genome mapping +`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa > genome_mapped.bwt 2> run.log`; + +#`convert_bowtie_to_blast.pl genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`; + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file# input reads fasta/fastq file +-g input file# genome file +-index file-prefix #(must be indexed by bowtie-build) The parameter + string must be the prefix of the bowtie index. For instance, if + the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then + the prefix is 'h_sapiens_37_asm'.##can be null +-v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; + +-p/--threads number of alignment threads to launch (default: 1) + +-r int a read is allowed to map up to this number of positions in the genome + default is 25 + +-o output directory + +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 nibls.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nibls.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,319 @@ +#!/usr/bin/perl +##################################################################################################### +#LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it. +# +# Dan MacLean +# dan.maclean@sainsbury-laboratory.ac.uk +# +# This program is free academic software; academic and non-profit +# users may redistribute it freely. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 +# see included file GPL3.txt +# +# + + +###Dont forget you will need ... +##################################################################################################### +# Boost::Graph +#Copyright 2005 by David Burdick +# Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm +#Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself. +##################################################################################################### + + + +use strict; +use warnings; +use Boost::Graph; +use Getopt::Long; + + +my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n"; + +my $gff_file ; +my $min_inc = 5; +my $clus = 0.6; +my $buff = 0; +my $output_file; +my $temp; +my $mark; + +GetOptions( + + 'c=f' => \$clus, + 'm=i' => \$min_inc, + 'f|file=s' => \$gff_file, + 'b=i' => \$buff, + 'o=s' => \$output_file, + 't=s' => \$temp, + 'k=s' => \$mark +) ; + + +die $usage unless $gff_file; + + +my $starttime = time; +warn "started $starttime\n"; + +## load in data +my %molecules; # stores starts and ends of srnas +open GFF, "<$gff_file"; + +while (my $entry = ){ + + chomp $entry; + next if($entry=~/^\#/); + my @data = split(/\t/,$entry); + my $chr=shift @data; + my $strand=shift @data; + my $start=shift @data; + my $end=shift @data; +# my $length1=$end-$start+1; +# if ($length1>30) { +# $length1=40; +# } + my $total; + for (my $s=0;$s<@data ;$s++) { + $total+=$data[$s]; + } + push @data,$total; +# push @data,$length1; +# if (defined $molecules{$chr}{$start}{$end}{$strand}) { +# my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand}); +# for (my $i=0;$i<$#old_data ;$i++) { +# $data[$i]+=$old_data[$i]; +# } +# } + my $data=join ";",@data; + $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information + #print "$chr\t$start\t$end\n"; +} + +close GFF; + +warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n"; + +my @sample=split/\#/,$mark; +$mark=join"\"\t\"",@sample; +open OUT, ">$output_file"; +print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n"; +open CLUSTER,">$temp"; +print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n"; +foreach my $chromosome (keys %molecules){ + my $g = new Boost::Graph(directed=>0); + my @starts = keys(%{$molecules{$chromosome}} ); + @starts = sort {$a <=> $b} @starts; + + while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough + + + foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){ + + + ###use new graph if the next srna is too far away from this one.. + if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){ + + + ##dump the info from the old graph + if (scalar(@{$g->get_nodes()}) > 2){ + + my $cluster_coeff = get_cc($g); + if ($cluster_coeff >= $clus){ + dump_locus($g, $cluster_coeff); + } + } + + + $g = new Boost::Graph(directed=>0); + + } + + foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below + + unless ($e eq $srna_end){ + my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name + my $en = $chromosome. ':' . $srna_start . ':' . $e; + $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1'); + } + + } + + foreach my $start (@starts){ ##build graph of overlaps + my $new = 0; + last if $start - $min_inc > $srna_end; + if ($start - $min_inc <= $srna_end){ + + my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end; + foreach my $end (keys %{$molecules{$chromosome}{$start}}){ + + my $end_node = $chromosome . ':' . $start . ':' . $end; + $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1'); + } + + } + } + } + + if (!(defined $starts[0])) { + ##dump the info from the last graph + if (scalar(@{$g->get_nodes()}) > 2){ + + my $cluster_coeff = get_cc($g); + if ($cluster_coeff >= $clus){ + dump_locus($g, $cluster_coeff); + } + } + } + } +} + +warn "Loci printed\nFinished\n"; + +my $endtime = time; + +my $elapsed = $endtime - $starttime; + +warn "Time elapsed = $elapsed s\n"; +close OUT; +close CLUSTER; +######################################################################################### +sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2 + + my $graph = shift; + + my @component = @{$graph->get_nodes()}; #number of nodes + my @clustering_coefficients; + + foreach my $vertex (@component) + { + + my @neighbours = @{$graph->neighbors($vertex)}; + + my %edges_in_graph; + + my $n = @neighbours; #n = the number of neighbours + my $k = ($n * ($n - 1))/2; #k = total number of possible connections + + my $e= 0; #actual number of connections within sub-graph + + foreach my $neighbour (@neighbours) + { + foreach my $neighbour_2 (@neighbours) + { + my $edge1 = "$neighbour\t$neighbour_2"; + my $edge2 = "$neighbour_2\t$neighbour"; + unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2}) + { + if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour)) + { + ++$e; + $edges_in_graph{$edge1}=1; + $edges_in_graph{$edge2}=1; + } + } + } + } + + if ($k >= 1) + { + my $c = $e / $k; + push @clustering_coefficients, $c; + } + else {push @clustering_coefficients, '0';} + } + + my $graph_n = scalar(@clustering_coefficients); + my $graph_cc = 0; + foreach my $cc (@clustering_coefficients){ + + $graph_cc = $graph_cc + $cc; + + } + $graph_cc = $graph_cc / $graph_n; + + return $graph_cc; +} + +############################################################################################################ + +sub dump_locus{ + + my $g = shift; + my $cc = shift; + my $chr; + my $start = 1000000000000000000000000000000000000000000000; + my $end = -1; + my @sample; + my @tag; + foreach my $node (@{$g->get_nodes()}){ + + $node =~ m/^(\S+):(\d+):(\d+)$/; + my $c=$1; + my $s=$2; + my $e=$3; + # my @data; + foreach my $str (keys %{$molecules{$c}{$s}{$e}}) { + my @data=split(/;/,$molecules{$c}{$s}{$e}{$str}); + push @tag,($s.",".$e.",".$str.",".$data[-1]); +# for (my $i=0;$i<$#old_data ;$i++) { +# $data[$i]+=$old_data[$i]; +# } + my $length=$e-$s+1; + if ($length>30) { + $length=40; + } + push @data,$length; + my $data=join ";",@data;#sample_exp/total_exp/length; + push @sample,$data; + } + + $chr = $c; + $start = $s if $s < $start; + $end = $e if $e > $end; + } + my $tag=join";",@tag; + my $tag_number=@tag; + my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample); + if ($max_length==40) { + $max_length="\>30"; + } + my $cluster_exp=join"\t",@cluster_exp; + my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp; + print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n"; + print OUT $gff, "\n"; +} + +sub Max_length{ + my @exp=@{$_[0]}; + my %sample_length; + my $total_exp; + my @each; + for (my $i=0;$i<=$#exp ;$i++) { + my @tag=split/;/,$exp[$i]; + my $length=pop(@tag); + my $exp=pop(@tag); + $sample_length{$length}+=$exp; + $total_exp+=$exp; + for (my $j=0;$j<=$#tag ;$j++) { + $each[$j]+=$tag[$j]; + } + } + my $max=0; + my $max_key; + foreach my $key (sort keys %sample_length) { + my $p=$sample_length{$key}/$total_exp; + if ($p>$max) { + $max=$p; + $max_key=$key; + } + $sample_length{$key}=sprintf("%.2f",$p); + } + return($max_key,$sample_length{$max_key},@each); +} diff -r 2ed1e1728299 -r 9dcffd531c76 phased_siRNA.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phased_siRNA.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,254 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2013/7/19 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +#use Math::Cephes qw(:hypergeometrics); + +my %opts; +GetOptions(\%opts,"i=s","o=s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file + +while (my $aline=) { + chomp $aline; + if ($aline=~/^\#/) { + print OUT $aline,"\tp-value\n"; + next; + } + my @tmp=split/\t/,$aline; + my @pos=split/:|-/,$tmp[0]; + $tmp[1]=~s/nt//; + my $pv=&phase($tmp[1],$pos[1],$pos[2],$tmp[4]); + + print OUT $aline,"\t",$pv,"\n"; +} +close IN; +close OUT; + +sub phase{ + my ($tagL,$start,$end,$tags)=@_; + my @tmp=split/\;/,$tags; + my %tag; + for (my $i=0;$i<@tmp;$i++) { + my @aa=split/\,/,$tmp[$i]; + next if($aa[1]-$aa[0]+1 != $tagL); +# $tag{$aa[0].",".$aa[2]}+=$aa[3] if($aa[2] eq "+"); +# $tag{($aa[1]).",".$aa[2]}+=$aa[3] if($aa[2] eq "-"); + $tag{$aa[0]}+=$aa[3] if($aa[2] eq "+"); + $tag{($aa[1]+3)}+=$aa[3] if($aa[2] eq "-"); + } + + my $pv=&pvalue2(\%tag,$tagL,$start,$end); + + return $pv; +} + +sub pvalue2{ + my ($tag,$tagL,$start,$end)=@_; + + my $p=1; my $pp=1; + foreach my $ccs(keys %{$tag}){ + my $n=0; + my $k=0; + my $K=0; + my $N=0; + + my $cor= $ccs; + my $ss=$cor; + my $ee=($cor+$tagL*10-1)<$end ? $cor+$tagL*10-1 : $end; + + my $max=0; + for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand + { + my $x=$i; + if (defined $$tag{$x}) + { + if ($max<$$tag{$x}) {$max=$$tag{$x};} + $n +=$$tag{$x}; + $N++; + } + } + for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand + { + my $x=$i; + if (defined $$tag{$x}) + { + $k +=$$tag{$x}; + $K++; + } + } + + + return $p if($K<3); + return $p if($max/$n>0.8); + + my $pn=0; + next if($n==$k); + $pn=10*$k/($n-$k)+1; + $pn = $pn ** ($K-2); + $pn = log($pn); + if ($p<$pn) { + $p=$pn; + } + + } + + return $p; + +} + +sub pvalue{ + my ($tag,$tagL,$start,$end)=@_; + + my $p=1; + foreach my $ccs(keys %{$tag}){ + my $n=-1; + my $k=-1; + + my ($cor, $str)=split(/,/, $ccs); + if ($str eq "+") # small RNAs on the Watson strand + { + my $ss=$cor; + my $ee=($cor+$tagL*11-1)<$end ? $cor+$tagL*11-1 : $end; + for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand + { + my $x=$i.","."+"; + if (defined $$tag{$x}) + { + $n=$n+1; + } + } + for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand + { + my $x=$i.","."+"; + if (defined $$tag{$x}) + { + $k=$k+1; + } + } + + for (my $j=$ss-2; $j<=$ee-2; $j++) # calculate n on the antisense strand + { + my $x=$j.","."-"; + if (defined $$tag{$x}) + { + $n=$n+1; + } + } + + for (my $j=$ss+$tagL-2; $j<=$ee-2; $j=$j+$tagL) # calculate k on the antisense strand + { + my $x=$j.","."-"; + if (defined $$tag{$x}) + { + $k=$k+1; + } + } + } + + elsif ($str eq "-") # small RNAs on the Crick strand + { + my $ee=$cor; + my $ss=$cor-$tagL*11+1> $start ? $cor-$tagL*11+1 : $start; + for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand + { + my $x=$i.","."-"; + if (defined $$tag{$x}) + { + $n=$n+1; + } + } + for (my $i=$ss+$tagL-1; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand + { + my $x=$i.","."-"; + if (defined $$tag{$x}) + { + $k=$k+1; + } + } + + for (my $j=$ss+2; $j<=$ee+2; $j++) # calculate n on the antisense strand + { + my $x=$j.","."+"; + if (defined $$tag{$x}) + { + $n=$n+1; + } + } + for (my $j=$ss+2; $j<=$ee+2; $j=$j+$tagL) # calculate k on the antisense strand + { + my $x=$j.","."+"; + if (defined $$tag{$x}) + { + $k=$k+1; + } + } + } + + next if($k<3); + + my $pn=0; my $N=$tagL*11*2-1; my $M=21; + for (my $w=$k; $w<=$M; $w++) # calculate p-value from n and k + { + my $c=1; + my $rr=1; + my $rw=1; + + for (my $j=0; $j<=$w-1; $j++) + { + $c=$c*($M-$j)/($j+1); + } + for (my $x=0; $x<=$n-$w-1; $x++) + { + $rr=$rr*($N-$M-$x)/($x+1); + } + for (my $y=0; $y<=$n-1; $y++) + { + $rw=$rw*($y+1)/($N-$y); + } + my $pr=$c*$rr*$rw; + + $pn=$pn+$pr; + } + + $p=$pn<$p ? $pn :$p; + + if ($p<0.001) #select and output small RNA clusters with p<0.001 + + { + + return $p; + + } + + } + return $p; +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file +-o output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 quantify.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quantify.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,64 @@ +#!/usr/bin/perl -w +#Filename: +#Author: chentt +#Email: +#Date: 2012-4-6 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","o=s","d=s","h"); +if (!(defined $opts{i} and defined $opts{d} and defined $opts{o}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $input=$opts{'i'}; +my $output=$opts{'o'}; +my $depth=$opts{'d'}; + +open (IN,"<$input")||die"$!"; +open OUT,">$output"; +#my @Total=qw(15797079 18042650 17455254 17295526 18791753 16719596 15150009 18451484 17402501 17729362 19347595 17518516 15699663 16589265 15442892 14012264 14190746 17280260 13213117 12390121 14874304 ); +my @Total=split/\,/,$depth; +#print OUT "#clusterID\tmajor_length\tpercent\n"; +while (my $aline=) { + chomp $aline; + if ($aline=~/^\"/){ + my @title=split/\t/,$aline; + for (my $i=0;$i<@title ;$i++) { + $title[$i]=~s/^\"(\S+)\"$/$1/; + } + my $title=join "\t",@title; + print OUT "\#$title\n"; + next; + } + my @temp=split/\t/,$aline; + print OUT "$temp[0]\t$temp[1]\t$temp[2]"; + my @id=split/:/,$temp[0]; + my @posi=split/-/,$id[1]; + for (my $i=3;$i<@temp;$i++) { + my $rpkm=sprintf("%.2f",$temp[$i]/($posi[1]-$posi[0]+1)/$Total[$i-3]*1000000000); + print OUT "\t$rpkm"; + } + print OUT "\n"; +} +close IN; +close OUT; + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -h +options: +-i input cluster file +-o output file +-d depth +-h help +USAGE +exit(1); +} \ No newline at end of file diff -r 2ed1e1728299 -r 9dcffd531c76 rfam.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rfam.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,85 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2013/7/19 +#Modified: +#Description: +my $version=1.00; + +use strict; +use Getopt::Long; +use File::Basename; + +my %opts; +GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","h"); +if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +unless ($fileout=~/\/$/) {$fileout.="/";} +my $rfam=$opts{'ref'}; +my $mis=defined $opts{'v'}? $opts{'v'} : 0; +my $index=defined $opts{'index'} ? $opts{'index'} : ""; +my $threads=defined $opts{'p'} ? $opts{'p'} : 1; + + +#my $time=time(); + +#my $mapdir=$fileout."/rfam_match_".$time; +my $mapdir=$fileout."/rfam_match"; +mkdir $mapdir; +chdir $mapdir; +###check genome index +if (-s $index.".1.ebwt") { +}else{ + &checkACGT($rfam); + `bowtie-build $rfam rfam`; + $index="rfam"; +} + +#chdir "rfam_match_1397022331"; +### genome mapping +`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`; + +sub checkACGT{ + my $string; + open IN,"<$rfam"; + while (my $aline=) { + if ($aline!~/^>/) { + $aline=~s/U/T/gi; + } + $string .=$aline; + } + close IN; + $rfam=basename($rfam); + open OUT, ">$rfam"; + print OUT $string; + close OUT; +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file# input reads fasta/fastq file +-ref input file# rfam file, which do not contain miRNAs +-index file-prefix #(must be indexed by bowtie-build) The parameter + string must be the prefix of the bowtie index. For instance, if + the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then + the prefix is 'h_sapiens_37_asm'.##can be null +-v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; + +-p/--threads number of alignment threads to launch (default: 1) + +-o output directory + +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 sRNA_plot.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sRNA_plot.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,411 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. +# 2. +# 3. +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h"); +if (!( defined $opt{o}) || defined $opt{h}) { +&usage; +} +my $span=$opt{span}; +#my $sample_cloumn=$opt{n}; +my $mark=$opt{mark}; +my @mark=split/\#/,$mark; +my $genelist=$opt{g}; +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + csv=>{ + 'stroke'=>"red", + 'stroke-width'=>0.5 + }, + exon=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + intron=>{ + 'stroke'=>"black", + 'stroke-width'=>1.5 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + #'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#############################s#define start coordinate and scale +open(TXT,">$opt{out}"); +open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; +my %length; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $length{$temp[0]}=$temp[1]; +} +close LENGTH; +#--------------------------------------------------------------- +open(GENE,"$opt{g}")||die"cannot open the file $opt{g}"; +my %genelist; +while (my $aline=) { + chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 + + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + if ($temp[1]=~/^Chr(\d)$/) { + $temp[1]="Chr0$1"; + } + push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]]; + +} +close GENE; +#my %have_gene; +#foreach my $chr (sort keys %genelist) { +# my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; +# my $start=$genelist[0][1]; +# my $end=$genelist[0][2]; +# for (my $i=0;$i<@genelist ;$i++) { +# if ($gene) { +# } +# } +#} + +my %gene_desity; +foreach my $chr (sort keys %genelist) { + my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; + for (my $i=0;$i<@genelist ;$i++) { + my $start=int($genelist[$i][1]/$span); + my $end=int($genelist[$i][2]/$span); + #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]}; + if ($start==$end) { + $gene_desity{$chr}[$start]++; + } + else{ + for (my $k=$start;$k<=$end ;$k++) { + $gene_desity{$chr}[$k]++; + } + } + } +} +#------------------------------------------region_gene_number------------------------- +my $max_gene_number=0; +my $total=0; +foreach my $chr (sort keys %genelist) { + for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) { + if (!(defined($gene_desity{$chr}[$i]))) { + $gene_desity{$chr}[$i]=0; + } + if ($gene_desity{$chr}[$i]>$max_gene_number) { + $max_gene_number=$gene_desity{$chr}[$i]; + #print "$gene_desity{$chr}[$i]\n"; + } + #print TXT "$i\t$gene_desity[$i]\n"; + $total+=$gene_desity{$chr}[$i]; + #print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; + } +} +#print "Gene max:$max_gene_number\ntotal:$total\n"; + +#--------------------------------------------------------------- +my %centromere; +if (defined($opt{cen})) { + open CEN,"$opt{cen}"; + while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $centromere{$temp[0]}[0]=$temp[1]; + $centromere{$temp[0]}[1]=$temp[2]; + } + close CEN; +} + +#--------------------------------------------------------------- +my $max_length=0; +foreach my $chr (keys %length) { + if ($max_length<$length{$chr}) { + $max_length=$length{$chr}; + } + print "$chr\n"; +} +#====================================cluster data======================================= +open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; +my %cluster; +my %cluster_density; +#my @sample=qw(39B3 3PA3 3LC3); +my @cluster_non_add; +while (my $aline=) { + next if($aline=~/^\#/); + chomp $aline;##Chr MajorLength Percent end 19B1 + my @temp=split/\t/,$aline; + my @ID=split/\:/,$temp[0]; + my @posi=split/\-/,$ID[1]; + my @all_rpkm=@temp; + shift @all_rpkm; + shift @all_rpkm; + shift @all_rpkm; +# for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer +# $all_rpkm[$s]=log2($all_rpkm[$s]); +# } + push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1); +} +close CLUSTER; +my %max_cluster; +my $chr_number=0; +print "@mark\n$mark\n"; +foreach my $chr (sort keys %cluster) { + for (my $i=0;$i<@mark ;$i++) { + $max_cluster{$chr}[$i]=0; + } + $chr_number++; +} +foreach my $chr (sort keys %cluster) { + @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + for (my $s=0;$s<@mark;$s++) { + if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) { + $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s]; + } + } + } + +} +foreach my $chr (sort keys %max_cluster) { + for (my $s=0; $s<@mark;$s++) { + # print "$max_cluster{$chr}[$s]\n"; + } +} +#--------------------------------------------------------------------------------------- +foreach my $chr(keys %cluster) { + for(my $i=0;$i<$#{$cluster{$chr}};$i++) { + my $start=int($cluster{$chr}[$i][1]/$span); + my $end=int($cluster{$chr}[$i][2]/$span); + if ($start==$end) { + for (my $s=0;$s<@mark ;$s++) { + $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s]; + } + + } + else{ + for (my $m=$start;$m<=$end ;$m++) { + for (my $s=0;$s<@mark ;$s++) { + $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s]; + } + } + } + } +} +my %max_cluster_density; +my $max_all_density=0; +foreach my $chr (sort keys %cluster) {# + for (my $s=0;$s<@mark ;$s++) { + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + $max_cluster_density{$chr}[$s]=0; + } + } + +} +foreach my $chr (sort keys %cluster_density) { + print "$#{$cluster_density{$chr}}\n"; + for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { + print TXT "$chr\t$k"; + for (my $s=0;$s<@mark;$s++) { + if (!(defined($cluster_density{$chr}[$k][$s]))) { + $cluster_density{$chr}[$k][$s]=0; + } + if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) { + $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s]; + } + if ($cluster_density{$chr}[$k][$s]>$max_all_density) { + $max_all_density=$cluster_density{$chr}[$k][$s]; + } + print TXT "\t$cluster_density{$chr}[$k][$s]"; + } + print TXT "\n"; + } +} +print "max density: $max_all_density\n"; +#-------------------------------------------------------------------- +my $top_margin=30; +my $tail_margin=30; +my $XOFFSET=50; +my $YOFFSET=60; +my $chr_length=600; +my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 +#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 +#========================================New canvas============================ +#### Starting #### +#新建画布 +my $width=1000; +my $heigth=100+130*$chr_number; +my $svg=SVG->new(width=>$width, height=>$heigth); +#画图起始点 +my $canvas_start_x=$XOFFSET; +my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线 +my $canvas_start_y=$YOFFSET; +my $canvas_end_y=$YOFFSET; +my $chr_heigth=$heigth-$YOFFSET-$tail_margin; +print "chr number:$chr_number\n"; +my $one_chr_heigth=$chr_heigth/$chr_number; +my $Yscale=($one_chr_heigth-15)/$max_all_density; +#my $chr_width=$YOFFSET; +#my $chr_start_y; +#my $chr_end_y; +#my $Yscale=0.01; +#=======================================title of the graph=============================== +#my $span_k=$span/1000; +#$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution"); +#=======================================the top max chr line============================= +$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); +$long_scale=int ($max_length/10);#十等分 大刻度 +#大坐标刻度 +for ($i=0;$i<=10;$i++) { + my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale; + my $long_x_end=$long_x_start; + my $long_y_start=$YOFFSET; + my $long_y_end=$YOFFSET-5; + $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + my $Bscale=$long_scale*$i; + my $cdata=int ($Bscale/1000000); + $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M"); +} +#========================================================================================= +my $cc=1; +foreach my $chr (sort keys %length) { + my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; + my $chr_start_x=$XOFFSET; + my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth; + my $chr_end_y=$chr_start_y; + #$chr_start_y+=$chr_width; + #$chr_end_y+=$chr_width; +# for (my $i=0;$i<@{$gene_desity{$chr}};$i++) { +# print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; +# my $red=$gene_desity{$chr}[$i]/$max_gene_number*255; +# my $green=$gene_desity{$chr}[$i]/$max_gene_number*255; +# print "$red\t$green\t0\n"; +# $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)"); +# } + + $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); + my $m_length=$length{$chr}%1000000; + $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M"); + + + if (defined($centromere{$chr}[0])) { + $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + } + for (my $s=0;$s<@mark ;$s++) { + for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) { + #if ($cluster_density{$chr}[$i]*$Yscale>40) { + #$cluster_density{$chr}[$i]=40/$Yscale; + #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); + #} + #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n"; + my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale; + my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale; + my $c_red=($s+1)/@mark*255; + $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3); + } + + } + #=======Y axis + $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + #=======Y axis ===>3 xiaoge + my $s10=1; + my $e10=0; + my $chr_max=$max_all_density; + while ($chr_max>10) { + $chr_max=int($chr_max/10); + $s10=$s10*10; + $e10++; + } + $chr_max=$chr_max/2; + #print "*****$max_all_density\t$chr_max\t$s10\n"; + for (my $i=1;$i<3 ;$i++) { + my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i; + my $xiaoge_Y=$chr_max*$i; + $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10); + } + $cc++; +} + +for (my $s=0;$s<@mark ;$s++) { + my $c_red=($s+1)/@mark*255; + print "**$c_red\n"; + $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1); + $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]); +} +# +# +if (defined($opt{cen})) { + $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); +} + +close TXT; + +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub log2 { + my $n = shift; + return log($n)/log(2); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-g genelist +-span +-n sample cloumn +-mark sample name +-o output graph file name with html or svg extension +-c cluster file input +-out txt output +-l length of chr +-cen centromere +-h help +USAGE +exit(1); +} \ No newline at end of file diff -r 2ed1e1728299 -r 9dcffd531c76 sRNA_rpkm_distribution_along_genome.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sRNA_rpkm_distribution_along_genome.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,264 @@ +#!/usr/bin/perl -w +#========================================================================================== +# Date: +# Title: +# Comment: Program to plot gene structure +# Input: 1. +# 2. +# 3. +# Output: output file of gene structure graph by html or svg formt +# Test Usage: +#======================================================================================== +#use strict; +my $version=1.00; +use SVG; +use Getopt::Long; +my %opt; +GetOptions(\%opt,"span=s","c=s","o=s","out=s","l=s","cen:s","n=s","mark=s","h"); +if (!( defined $opt{o}) || defined $opt{h}) { +&usage; +} +my $span=$opt{span}; +my $sample_cloumn=$opt{n}; +my $mark=$opt{mark}; +#===============================Define Attribute========================================== +my %attribute=( + canvas=>{ + 'width'=>1500, + 'height'=>1800 + }, + text=>{ + 'stroke'=>"#000000", + 'fill'=>"none", + 'stroke-width'=>0.5 + }, + line=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + csv=>{ + 'stroke'=>"red", + 'stroke-width'=>0.5 + }, + exon=>{ + 'stroke'=>"black", + 'stroke-width'=>1 + }, + intron=>{ + 'stroke'=>"black", + 'stroke-width'=>1.5 + }, + font=>{ + 'fill'=>"#000000", + 'font-size'=>12, + 'font-size2'=>10, + #'font-weight'=>'bold', + 'font-family'=>"Arial" + #'font-family'=>"ArialNarrow-bold" + }, + rect=>{ + 'fill'=>"lightgreen", + 'stroke'=>"black", + 'stroke-width'=>0.5 + }, + readwidth=>0.5 +); +#############################s#define start coordinate and scale +open(TXT,">$opt{out}"); +open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; +my %length; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $length{$temp[0]}=$temp[1]; +} +close LENGTH; +#--------------------------------------------------------------- +my %centromere; +if (defined($opt{cen})) { + open(CEN,"$opt{cen}")||die"cannot open the file $opt{cen}"; + while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + my @temp=split/\t/,$aline; + $temp[0]=~s/^c/C/; + $centromere{$temp[0]}[0]=$temp[1]; + $centromere{$temp[0]}[1]=$temp[2]; + } + close CEN; +} + +#--------------------------------------------------------------- +my $max_length=0; +foreach my $chr (keys %length) { + if ($max_length<$length{$chr}) { + $max_length=$length{$chr}; + } + print "$chr\n"; +} +#====================================cluster data======================================= +open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; +my %cluster; +my %cluster_density; +#my @sample=qw(39B3 3PA3 3LC3); +my @cluster_non_add; +while (my $aline=) { + next if($aline=~/^\#/); + chomp $aline;##ID chr strand start end 19B1 + my @temp=split/\t/,$aline; + my @ID=split/\:/,$temp[0]; + my @posi=split/\-/,$ID[1]; + push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],$temp[2+$sample_cloumn]];#ID start end rpkm(19B1,1PA1,1LC1); +} +close CLUSTER; +my %max_cluster; +foreach my $chr (sort keys %cluster) { +# for (my $i=0;$i<3 ;$i++) { +# $max_cluster{$chr}[$i]=0; +# } + $max_cluster{$chr}=0 +} +foreach my $chr (sort keys %cluster) { + @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; + #for (my $s=0;$s<3;$s++) { + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + if ($cluster{$chr}[$i][3]>$max_cluster{$chr}) { + $max_cluster{$chr}=$cluster{$chr}[$i][3]; + } + } + #} + +} +#--------------------------------------------------------------------------------------- +foreach my $chr(keys %cluster) { + for(my $i=0;$i<$#{$cluster{$chr}};$i++) { + my $start=int($cluster{$chr}[$i][1]/$span); + my $end=int($cluster{$chr}[$i][2]/$span); + if ($start==$end) { + #for (my $j=0;$j<3 ;$j++) { + $cluster_density{$chr}[$start]+=$cluster{$chr}[$i][3]; + #} + + } + else{ + for (my $m=$start;$m<=$end ;$m++) { + #for (my $j=0;$j<3 ;$j++) { + $cluster_density{$chr}[$m]+=$cluster{$chr}[$i][3]; + #} + } + } + } +} +my %max_cluster_density; +foreach my $chr (sort keys %cluster) {# + #for (my $i=0;$i<3 ;$i++) { + for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { + $max_cluster_density{$chr}=0; + } + #} +} +foreach my $chr (sort keys %cluster) { + #for (my $i=0;$i<3;$i++) { + for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { + if (!(defined($cluster_density{$chr}[$k]))) { + $cluster_density{$chr}[$k]=0; + } + if ($cluster_density{$chr}[$k]>$max_cluster_density{$chr}) { + $max_cluster_density{$chr}=$cluster_density{$chr}[$k]; + } + print TXT "$chr\t$k\t$cluster_density{$chr}[$k]\n"; + } + #} +} +#-------------------------------------------------------------------- +my $XOFFSET=50; +my $YOFFSET=60; +#my $length=$end-$start+1; +my $Xscale=600/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 +#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 +#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 +#========================================New canvas============================ +#### Starting #### +#新建画布 +my $svg=SVG->new(); +#画图起始点 +my $canvas_start_x=$XOFFSET; +my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺 画线 +my $canvas_start_y=$YOFFSET; +my $canvas_end_y=$YOFFSET; + +my $chr_width=$YOFFSET; +my $chr_start_y; +my $chr_end_y; +my $Yscale=0.01; +foreach my $chr (sort keys %length) { + my $chr_start_x=$XOFFSET; + my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; + $chr_start_y+=$chr_width; + $chr_end_y+=$chr_width; + $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); + $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); + $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$length{$chr}); + + + if (defined($centromere{$chr}[0])) { + $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + } + for (my $i=0;$i<$#{$cluster_density{$chr}} ;$i++) { + if ($cluster_density{$chr}[$i]*$Yscale>40) { + $cluster_density{$chr}[$i]=40/$Yscale; + $svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); + } + my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; + my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; + my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i]*$Yscale; + #my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][0]*$Yscale; + #$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'}); + $svg->rect('x',$cluster_density_start_x,'y',$chr_start_y-$cluster_density{$chr}[$i]*$Yscale,'width',$span*$Xscale,'height',$cluster_density{$chr}[$i]*$Yscale,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); + } + + $chr_width=50; + + #$svg->rect('x',$c_non_add_start_x,'y',$c_non_add_start_y,'width',$cluster_non_add_width,'height',$cluster_non_add_heigth,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); +} + +my $span_k=$span/1000; +$svg->text('x',200,'y',$chr_start_y+20,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"$mark sRNA rpmk \/ $span_k kb"); + +$svg->rect('x',600,'y',500,'width',10,'height',10,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); +$svg->text('x',620,'y',510,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"sRNA rpkm"); + +if (defined($opt{cen})) { + $svg->rect('x',600,'y',520,'width',10,'height',10,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); + $svg->text('x',620,'y',530,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); +} + +close TXT; +open (OUT,">$opt{o}"); +print OUT $svg->xmlify(); + +sub log2 { + my $n = shift; + return log($n)/log(2); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 +options: +-span +-n sample cloumn +-mark sample name +-o output graph file name with html or svg extension +-c cluster file input +-out txt output +-l length of chr +-cen centromere +-h help +USAGE +exit(1); +} \ No newline at end of file diff -r 2ed1e1728299 -r 9dcffd531c76 sam2Bed_bowtie.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam2Bed_bowtie.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,74 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Tian Dongmei +#Email: tiandm@big.ac.cn +#Date: 2011/11/7 +#Modified: +#Description: sam2BED +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","mark=s","o=s","h"); +if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +my $mark=$opts{'mark'}; +my @sample=split/\#/,$mark; +$mark=join"\t",@sample; +open OUT,">$fileout"; #output file +print OUT "#chr\tstrand\tstart\tend\t$mark\n"; + +open IN,"<$filein"; #input file +my $Tags_num=0; +my @read_num; +#print OUT "#chr\tstart\tend\tnum\t<=20\t21\t22\t23\t24\t>=25\n"; +while (my $aline=) { + chomp $aline; + next if($aline=~/^\@/); + my @tmp=split/\t/,$aline; + my $strand=$tmp[1]; + my $start=$tmp[3]+1; + my $length=length($tmp[4]); + my $end=$start+$length-1; + my $hit=$tmp[6]+1; + #======express caculate weighted=================================== + my $exp; + my @tempID=split/\:/,$tmp[0]; + my @exp=split/\_/,$tempID[1]; + pop @exp; + for (my $j=0;$j<@exp ;$j++) { + #my @tempID1=split/\=/,$tempID[$j]; + $exp[$j]=sprintf("%.2f",$exp[$j]/$hit); + $read_num[$j]+=$exp[$j]; + #print OUT "\t$exp"; + } + $exp=join "\t",@exp; + print OUT $tmp[2],"\t",$strand,"\t",$start,"\t",$end,"\t",$exp,"\n"; + $Tags_num++; + +} +print "Total Tags numer: $Tags_num\n"; +my $read_number=join "\t",@read_num; +print "Each sample numer: $read_number\n"; +close IN; +close OUT; +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o +options: +-i input file +-mark sampleA sampleB sampleC..... +-o output file +-h help +USAGE +exit(1); +} + diff -r 2ed1e1728299 -r 9dcffd531c76 siRNA.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/siRNA.pl Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,521 @@ +#!/usr/bin/perl -w +my $version=1.00; +use strict; +use warnings; +use Getopt::Long; +use Getopt::Std; +use threads; +use threads::shared; +use Parallel::ForkManager; +use lib '/leofs/biotrans/chentt/perl_module/'; +#perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq +print " +##################################### +# # +# sRNA cluster # +# # +##################################### +"; +########################################################################################### +my $usage="$0 +Options: +-i input file# raw data file +-tag string #raw data sample name +-g genome file +-f gff file + +-o workdir file +-path script path +-t int, number of threads [1] +-format fastq, fq, fasta or fa +-idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter + string must be the prefix of the bowtie index. For instance, if + the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then + the prefix is 'h_sapiens_37_asm'.##can be null +-mis int number of allowed mismatches when mapping reads to genome, default 0 +-rfam string, input file# rfam database file. +-idx2 string, rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter + string must be the prefix of the bowtie index. For instance, if + the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then + the prefix is 'h_sapiens_37_asm'.##can be null + +-v int report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment + +-a string, ADAPTER string. default is ATCTCGTATG. +-n int max hits number,default 25; used in genome alignment +-d int distance of tag to merged a cluster; default 100 +-p cluster method F :conventional default is F + T :NIBLES +-l int the length of the upstream and downstream,default 1000;used in position annotate + +-nat natural antisense transcripts file +-repeat repeat information file out of Repeatmasker +-deg file config of de sample +-cen centromere file input +-span plot span, default 50000 +"; + +my %options; +GetOptions(\%options,"i:s@","tag:s@","g=s","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h"); +#print help if that option is used +if($options{h}){die $usage;} + +my @filein=@{$options{'i'}}; +my @mark=@{$options{'tag'}}; + +#my $config=$options{'i'}; +my $genome_fa=$options{'g'}; +my $gff=$options{'f'}; + + +########################################################################################## +my $predir=`pwd`; +chomp $predir; +my $workdir=defined($options{'o'}) ? $options{'o'}:$predir; + +my $path=$options{'path'}; + +my $t=defined($options{'t'})? $options{'t'}:1; #threads number + +my $mis=defined $options{'mis'} ? $options{'mis'}:0; + +my $mis_rfam=defined $options{'v'} ? $options{'v'}:0; + +my $hit=defined $options{'n'}?$options{'n'}:25; + +my $distance_of_merged_tag=defined $options{'d'} ? $options{'d'}:100; + +my $up_down_dis=defined $options{'l'} ?$options{'l'}:1000; + +my $cluster_mothod=defined $options{'p'}?$options{'p'}:"F"; + +my $format=$options{'format'}; +#if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { +# die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; +#} + +my $adpter="ATCTCGTATG"; #adapter +if (defined $options{'a'}) {$adpter=$options{'a'};} + + +my $phred_qv=64; +my $sample_number; +my ($dir,$dir_tmp); +################################ MAIN ################################################## +print "\ncluster program start:"; +my $time=Time(); +make_dir_tmp(); + +my @clip; +my $mark; +my $sample_mark; + +my $config=$dir."/input_config"; +open CONFIG,">$config"; + for (my $i=0;$i<@filein;$i++) { + print CONFIG $filein[$i],"\t",$mark[$i],"\n"; + } +close CONFIG; +if (@filein != @mark) { + die "Maybe config file have some wrong!!!\n"; +} +$sample_number=@mark; +$mark=join "\t",@mark; +$sample_mark=join "\#",@mark; + + +#read_config(); + +trim_adapter_and_filter(); + +my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data +my $data2=$filter_out; ### mirbase not mapped reads +my $data3=$dir."\/rfam_match\/rfam_not_mapped\.fa"; ### rfam not mapped reads +my $bed=$dir."cluster\/"."sample\.bed"; +my $read=$dir."cluster\/"."sample_reads\.cluster"; +my $read_txt=$dir."cluster\/"."cluster\.txt"; +my $rpkm=$dir."cluster\/"."sample_rpkm\.cluster"; +my $preprocess; +my $cluster_file; +my $annotate_dir; +my $deg_dir; +my $plot_dir; +my %id; +for (my $i=0;$i<@mark ;$i++) { + $id{$mark[$i]}=$i+4; +} + +print "\n######## tiandm test start ###########\n"; +print "\@mark: @mark\n\%id keys number:"; +print scalar keys %id; +print "\n"; +foreach my $kyess (keys %id){ + print $kyess," --> $id{$kyess}\n"; +} +print "\n######## tiandm test end ############\n"; +group_and_filter(); #collapse reads to tags + +rfam(); + +my @map_read; +my $map_tag=0; +genome(); + +bwt2bed(); + +cluster(); + +quantify(); + +phase(); + +if (defined($options{'nat'})&&defined($options{'repeat'})) { + class(); +} +else{ + get_genelist(); +} + +annotate(); + +genome_length(); + +plot(); + +my @pairdir; +if (defined($options{'deg'})) { + dec(); + infor_merge(); +} +else{infor_merge_no_dec()} +html(); +print "\ncluster program end:"; +Time(); +############################sub program################################################### +sub make_dir_tmp{ + + #make temporary directory + if(not -d "$workdir\/cluster_runs"){ + mkdir("$workdir\/cluster_runs"); + mkdir("$workdir\/cluster_runs\/ref\/"); + } + + $dir="$workdir\/cluster_runs\/"; + #print STDERR "mkdir $dir\n\n"; + return; +} + +#sub read_config{ +# open IN,"<$config"; +# while (my $aline=) { +# chomp $aline; +# my @tmp=split/\t/,$aline; +# push @filein,$tmp[0]; +# push @mark,$tmp[1]; +# } +# close IN; +# if (@filein != @mark) { +# die "Maybe config file have some wrong!!!\n"; +# } +# $sample_number=@mark; +# $mark=join "\t",@mark; +# $sample_mark=join "\#",@mark; +#} + + +sub trim_adapter_and_filter{ + my $time=time(); + $preprocess=$dir."preProcess/"; + mkdir $preprocess; + my $can_use_threads = eval 'use threads; 1'; + if ($can_use_threads) { + # Do processing using threads + my @filein1=@filein; my @mark1=@mark; + while (@filein1>0) { + my @thrs; my @res; + for (my $i=0;$i<$t ;$i++) { + last if(@filein1==0); + my $in=shift @filein1; + my $out=shift @mark1; + push @clip,$dir."preProcess\/$out\_clip\.fq"; + $thrs[$i]=threads->create(\&clips,$in,$out); + } + for (my $i=0;$i<@thrs;$i++) { + $res[$i]=$thrs[$i]->join(); + } + } + } + else { +# Do not processing using threads + for (my $i=0;$i<@filein ;$i++) { + my $in=$filein[$i]; + my $out=$mark[$i]; + push @clip,$dir."preProcess\/$out\_clip\.fq"; + &clips($in,$out); + } + } +} + +sub clips{ + my ($filein,$fileout)=@_; + my $adapter=$dir."preProcess\/$fileout\_clip\.fq"; + if($format eq "fq" || $format eq "fastq"){ + my $clip=`fastx_clipper -a $adpter -M 6 -Q $phred_qv -i $filein -o $adapter`; + } + if($format eq "fa" || $format eq "fasta"){ + my $clip=`fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`; + } + #my $clean=$dir."preProcess\/$fileout\_clean.fq"; + #my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `; + return $fileout; +} + +sub group_and_filter{ + #my ($ins,$data)=@_; + my @ins=@clip; + my $str=""; + my $group_out_file=$dir."preProcess\/"."collapse_reads.fa"; + #print "$$ins[0]\t$$ins[0]\n"; + for (my $i=0;$i<@clip;$i++) { + $str .="-i $clip[$i] "; + #print "$$ins[$i]\n"; + } + my $group=`perl $path\/collapseReads2Tags.pl $str -mark seq -o $group_out_file -format $format`; + print "perl $path\/collapseReads2Tags.pl $str -mark seq -o $group_out_file -format $format\n\n"; + + my $l_out=$dir."preProcess\/"."collapse_reads_18-40.fa"; + my $length_f=`perl $path\/filterReadsByLength_1.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $sample_mark`; + print "perl $path\/filterReadsByLength_1.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $sample_mark\n\n"; + my $cout_f=`perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark`; + print "perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark\n\n"; + my $plot_l_D=`perl $path/Length_Distibution.pl -i $dir/preProcess/reads_length_distribution_after_count_filter.txt -o $dir/preProcess/length.html `; + print "perl $path\/Length_Distibution.pl -i $dir\/preProcess\/reads_length_distribution_after_count_filter.txt -o $dir\/preProcess\/length\.html\n\n"; + return 0; +} + +sub rfam{ + if (defined $options{'idx2'}) { + system("perl $path\/rfam.pl -i $data2 -ref $options{rfam} -v $mis_rfam -p $t -o $dir -index $options{idx2}"); + }else{ + system("perl $path\/rfam.pl -i $data2 -ref $options{rfam} -v $mis_rfam -p $t -o $dir"); + } + my $tag=join "\\;" ,@mark; + my $rfam_count=`perl $path\/count_rfam_express.pl -i $dir\/rfam_match\/rfam_mapped.bwt -tag $tag -o $dir\/rfam_match\/rfam_non-miRNA_annotation.txt`; + return 0; +} +sub genome{ + if(defined $options{'idx'}){ + system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir -index $options{idx}") ; + }else{ + system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir ") ; + } + #=================== mapping sta =================================================== + my $map_file=$dir."genome_match\/genome_mapped\.fa"; + open (MAP,"<$map_file")||die"$!"; + print "\n#each sample mapping reads sta:\n\n"; + print "#$mark\ttotal\n"; + while (my $ID=) { + chomp $ID; + my @tmp=split/\:/,$ID; + my @exp=split/\_/,$tmp[1]; + $exp[-1] =~ s/^x//; + for (my $i=0;$i<@exp ;$i++) { + $map_read[$i]+=$exp[$i]; + } + $map_tag++; + my $seq=; + } + my $map_read=join"\t",@map_read; + print "$map_read\n\n"; + print "#total mapped tags:$map_read\n\n"; + close MAP; + return 0; +} + +sub bwt2bed{ + $cluster_file=$dir."cluster\/"; + mkdir ("$cluster_file"); + print "sam file changed to bed file\n"; + my ($file) = $dir."genome_match\/genome_mapped\.bwt"; + + my $sam2bed=`perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed `; + print "perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed\n\n"; + return 0; +} + +sub cluster{ + print "tags is ready to merged clusters\n\n"; + my ($file) =$bed; + if ($cluster_mothod eq "F") { + my $cluster=`perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt`; + print "Using converntional method\n perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt\n\n"; + } + elsif($cluster_mothod eq "T"){ + my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -k $sample_mark`; + print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -k $sample_mark\n\n"; + } + else{print "\-p is wrong!\n\n";} + return 0; +} + + +sub quantify{ + print "clusters is ready to quantified\n\n"; + my @depth=@map_read; + pop @depth; + my $depth=join ",",@depth; + my $quantify=`perl $path\/quantify.pl -i $read -d $depth -o $rpkm`; + print "perl $path\/quantify.pl -i $read -d $depth -o $rpkm\n\n\n"; + return 0; +} + +sub phase{ + $annotate_dir=$dir."annotate\/"; + mkdir ("$annotate_dir"); + print "clusters is to predict phase siRNA\n"; + my $phase=`perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out`; + print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n"; + return 0; +} + +sub class{ + print "clusters is ready to annotate by sources\n\n"; + my $nat=$options{'nat'}; + my $repeat=$options{'repeat'}; + my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`; + print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n"; +} + +sub annotate{ + print "clusters is ready to annotate by gff file\n\n"; + my $file; + if (defined($options{'nat'})&&defined($options{'repeat'})) { + $file="$annotate_dir\/sample_class.anno"; + } + else{ + $file=$rpkm; + } + my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`; + print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n"; + return 0; +} +sub get_genelist{ + + my $get_genelist=`perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt`; + print "perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt"; +} + +sub dec{ + print "deg reading\n\n"; + my $deg_file=$options{'deg'}; + open IN,"<$deg_file"; + my @deg; + my $s=0; + while (my $aline=) { + chomp $aline; + next if($aline=~/^\#/); + $deg[$s]=$aline; + my @ea=split/\s+/,$aline; + push @pairdir,"$ea[0]_VS_$ea[1]\/"; + #print "$deg[$s]\n"; + $s++; + } + close IN; + $deg_dir=$dir."deg\/"; + mkdir ("$deg_dir"); + my $max_process = 10; + my $pm = new Parallel::ForkManager( $max_process ); + my $number=@deg-1; + foreach(0..$number){ + $pm->start and next; + &dec_pel($deg[$_]); + $pm->finish; + } + $pm->wait_all_children; +} + +sub dec_pel{ + print "\n******************\nstart:\n"; + Time(); + my $sample=shift(@_); + my @each=split/\s+/,$sample; + print "$each[0]\t$each[1]\n"; + my $deg_sample_dir=$deg_dir."$each[0]_VS_$each[1]\/"; + mkdir ("$deg_sample_dir"); + print "read: $read\n"; + print "deg_sample_dir: $deg_sample_dir\n"; + print "$id{$each[0]}\t$each[0]\n"; + print "$id{$each[1]}\t$each[1]\n"; + my $deg=`perl $path\/DEGseq_2.pl -i $read -outdir $deg_sample_dir -column1 $id{$each[0]} -mark1 $each[0] -column2 $id{$each[1]} -mark2 $each[1]`; #-depth1 -depth2 + my $time2=time(); + print "end:\n*************************\n"; + Time(); + sleep 1; +} + +sub infor_merge{ + my ($input,$mark); + foreach (@pairdir) { + print "@pairdir\n"; + $mark.=" -mark $_ "; + $input.=" -i $dir/deg\/$_\/output_score\.txt "; + print "$input\n$mark\n"; + } + my $infor_merge=`perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result `; + print "perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result\n\n"; +} + +sub infor_merge_no_dec{ + my $infor_merge_no_dec=`cp $annotate_dir\/sample_c_p.anno $dir\/total.result`; +} + +sub genome_length{ + my $length=`perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length`; + print "perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length\n\n" + +} + +sub plot{ + $plot_dir="$dir\/plot\/"; + mkdir ("$plot_dir"); + my $span=defined($options{span})?$options{span}:50000; + my $cen=""; + if (defined $options{cen}) { + $cen="-cen $options{cen}"; + } + my $plot=`perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome\.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt `; + "print perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt \n"; + +} + +sub html{ + my $pathfile="$dir/path.txt"; + open PA,">$pathfile"; + print PA "$config\n"; + print PA "$preprocess\n"; + print PA "$dir"."rfam_match\n"; + print PA "$dir"."genome_match\n"; + print PA "$cluster_file\n"; + print PA "$annotate_dir\n"; + print PA "$plot_dir\n"; + if (defined($deg_dir)) { + print PA "$deg_dir\n"; + } + close PA; + my $html=`perl $path\/html.pl -i $pathfile -format $format -o $dir/result.html`; +} + +sub Time{ + my $time=time(); + my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; + $month++; + $year+=1900; + if (length($sec) == 1) {$sec = "0"."$sec";} + if (length($min) == 1) {$min = "0"."$min";} + if (length($hour) == 1) {$hour = "0"."$hour";} + if (length($day) == 1) {$day = "0"."$day";} + if (length($month) == 1) {$month = "0"."$month";} + print "$year-$month-$day $hour:$min:$sec\n"; + return("$year-$month-$day-$hour-$min-$sec"); +} +################################################################################# diff -r 2ed1e1728299 -r 9dcffd531c76 siRNA.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/siRNA.xml Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,106 @@ + + tool for plant siRNA analisis + + + SCRIPT_PATH + bowtie + R + degseq + fastx_toolkit + threads + Parallel-ForkManager + SVG + Boost-Graph + + + siRNA.pl + ## Change this to accommodate the number of threads you have available. + -t \${GALAXY_SLOTS:-4} + + -path \$SCRIPT_PATH + + #for $j, $s in enumerate( $series ) + ##rank_of_series=$j + -i ${s.input} + -tag ${s.tag} + #end for + + ## Do or not annotate siRNAs by function + #if $params.function_anno == "yes": + -nat $params.nat -repeat $params.repeat + #end if + + ## Do or not DEG + #if $degseq.degseq_analysis == "yes" : + -deg $degseq.deg + #end if + + -format $format -g $genome -f $gff -mis $mis -rfam $rfam -v $v -a $a -n $mapnt -d $d -p $p -l $l -cen $cen -span $span > run.log + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 2ed1e1728299 -r 9dcffd531c76 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Nov 05 21:09:35 2014 -0500 @@ -0,0 +1,86 @@ + + + + + + + + + + $REPOSITORY_INSTALL_DIR + + + + + + + + + + + + + + R CMD BATCH $REPOSITORY_INSTALL_DIR/install_DEG.R + + + + + + + + http://www.cpan.org/authors/id/J/JD/JDHEDDEN/threads-1.96.tar.gz + perl Makefile.PL PREFIX=$INSTALL_DIR + make + make install + + $INSTALL_DIR/lib + + + + + + + + + + http://www.cpan.org/authors/id/S/SZ/SZABGAB/Parallel-ForkManager-1.06.tar.gz + perl Makefile.PL PREFIX=$INSTALL_DIR + make + make install + + $INSTALL_DIR/lib + + + + + + + + + http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz + perl Makefile.PL PREFIX=$INSTALL_DIR + make + make install + + $INSTALL_DIR/lib + + + + + + + + + http://www.cpan.org/authors/id/D/DB/DBURDICK/BoostGraph/Boost-Graph-1.4.tar.gz + perl Makefile.PL PREFIX=$INSTALL_DIR + make + make install + + $INSTALL_DIR/lib + + + + + +