# HG changeset patch # User big-tiandm # Date 1415239847 18000 # Node ID b6686462d0cba52169af19c9fa6c95cf303b07d3 # Parent 9dcffd531c763a90351fba1f8d8e33f3e5a74017 Deleted selected files diff -r 9dcffd531c76 -r b6686462d0cb Annotate.pl --- a/Annotate.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,178 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Chentt -#Email: chentt@big.ac.cn -#Date: 2014/4/10 -#Modified: -#Description: cluster annotate by priority -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","d=i","g=s","o=s","t=s","h"); -if (!(defined $opts{i} and defined $opts{g} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} -#my $genelistout=$opts{'t'}; -my $dis=defined $opts{'d'}? $opts{'d'}:1000; -my %gene; - -#open OUT,">$genelistout"; #output file -#print OUT "#ID\tchr\tstart\tend\tstrand\ns"; -open IN,"<$opts{g}"; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @tmp=split/\t/,$aline;#ID chr start end strand - #push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; - $gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[4]]; -} -#while (my $aline=) { -# chomp $aline; -# next if($aline=~/^\#/); -# my @tmp=split/\t/,$aline; -# my $ID; -# if ($tmp[2] eq "gene") { -# $tmp[0]=~s/Chr\./Chr/; -# $tmp[0]=~s/Chr/chr/; -# my @infor=split/;/,$tmp[8]; -# for (my $i=0;$i<@infor ;$i++) { -# if ($infor[$i]=~/Alias\=(\S+)$/) { -# $ID=$1; -# last; -# } -# } -# $gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] -# print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; -# } -#} -close IN; -#close OUT; - - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; - -open IN,"<$filein"; #input file -open OUT,">$fileout"; #output file -while (my $aline=) { - chomp $aline; - my @tmp=split/\t/,$aline; - if($aline=~/^\#/){print OUT "$aline\tP_annotate\n";next} - my @result; - #shift @tmp; - my @id=split/:/,$tmp[0]; - $id[0]=~s/Chr0/Chr/; - my @posi=split/-/,$id[1]; - foreach my $key (keys %{$gene{$id[0]}}) { - if ($posi[0]<$gene{$id[0]}{$key}[1] && $posi[1]>$gene{$id[0]}{$key}[0]) { - push @result,"gene-body;$key;$gene{$id[0]}{$key}[2]";#$te{$key}"; - next; - } - #if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-1000) { - if ($posi[0]<$gene{$id[0]}{$key}[0] && $posi[1]>$gene{$id[0]}{$key}[0]-$dis) { - push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); - push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); - next; - } - #if ($posi[0]<$gene{$id[0]}{$key}[1]+1000 && $posi[1]>$gene{$id[0]}{$key}[1]) { - if ($posi[0]<$gene{$id[0]}{$key}[1]+$dis && $posi[1]>$gene{$id[0]}{$key}[1]) { - push @result,"down1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "+"); - push @result,"up1-kb;$key;$gene{$id[0]}{$key}[2]" if($gene{$id[0]}{$key}[2] eq "-"); - next; - } - } - my $result; - if (!(@result)) { - $result="intergenic"; - } - elsif($#result==0){ - $result=$result[0]; - - } - else{ - $result=join "\t",@result; - } -# else{ -# my $te_num=0; -# my @te_overlap; -# my @te_up_down; -# my @non_overlap; -# my @non_up_down; -# for (my $k=0;$k<@result ;$k++) { -# my @rr=split/\;/,$result[$k]; -# if ($rr[3] eq "Y") { -# $te_num++; -# if ($rr[0] eq "overlap") { -# push @te_overlap,$result[$k]; -# } -# else{ -# push @te_up_down,$result[$k]; -# } -# } -# else{ -# if ($rr[0] eq "overlap") { -# push @non_overlap,$result[$k]; -# } -# else{ -# push @non_up_down,$result[$k]; -# } -# } -# } -# if ($te_num==0) {#non TE -# if (!(@te_overlap)) {#down up -# if ($#non_up_down==0) { -# $result=$non_up_down[0]; -# } -# else{#overlap -# my $all_2=join "\t",@non_up_down; -# $result="up&down1-kb\t".$all_2; -# } -# } -# else{ -# $result=join "\t",@non_overlap; -# if ($#non_overlap>=1) { -# print "$aline\t$result\n"; -# } -# } -# } -# else{#TE -# if (!(@te_overlap)) {#down up -# if ($#te_up_down==0) { -# $result=$te_up_down[0]; -# } -# else{#overlap -# my $all_2=join "\t",@te_up_down; -# $result="up&down1-kb\t".$all_2; -# } -# } -# else{ -# $result=join "\t",@te_overlap; -# if ($#te_overlap>=1) { -# print "$aline\t$result\n"; -# } -# } -# } -# } - print OUT "$aline\t$result\n"; -} - -close IN; -close OUT; -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -g -d -options: --i input file --g genelist file --d int the length of the upstream and downstream,default 1000 --o output file --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb ClassAnnotate.pl --- a/ClassAnnotate.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,251 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Chen Tingting -#Email: chentt@big.ac.cn -#Date: 2014/5/13 -#Modified: -#Description: cluster annotate -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","g=s","n=s","r=s","p=s","o=s","t=s","l=s","h"); -if (!(defined $opts{i} and defined $opts{g} and defined $opts{n} and defined $opts{r} and defined $opts{p} and defined $opts{o} and defined $opts{t} and defined $opts{l}) || defined $opts{h}) { #necessary arguments -&usage; -} - -#my %gene; -my %gene1; -open IN,"<$opts{g}"; -open OUT ,">$opts{l}"; -print OUT "#ID\tchr\tstart\tend\tstrand\n"; -my $n=1; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @tmp=split/\t/,$aline; - my $ID; - if ($tmp[2] eq "gene") { - $tmp[0]=~s/Chr\./Chr/; - #$tmp[0]=~s/Chr/chr/; - my @infor=split/;/,$tmp[8]; - for (my $i=0;$i<@infor ;$i++) { - if ($infor[$i]=~/Alias\=(\S+)$/) { - $ID=$1; - last; - } - else { - $ID="unknown$n"; - $n++; - } - } - #$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand] - push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]]; - print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n"; - } -} -#while (my $aline=) { -# chomp $aline; -# next if($aline=~/^\#/); -# my @tmp=split/\t/,$aline;#ID chr start end strand -# push @{$gene1{$tmp[0]}},[$tmp[2],$tmp[3],$tmp[1]]; -# #$gene{$tmp[1]}{$tmp[0]}=[$tmp[2],$tmp[3],$tmp[1]]; -#} -close IN; -close OUT; - -my %nat; -open TMP,">$opts{t}"; -print TMP "#NAT_ID\tGene\tStrand\tChr\tGene_start\tGene_end\tAntiGene\tStrand\tChr\tAntiGene_start\tAntiGene_end\tType1\tType2\tNATS1_start\tNATS1_end\tNATS2_start\tNATS2_end\n"; - -open IN,"<$opts{n}"; -my $nat_n=1; -while (my $aline=) { - next if($aline=~/^\#/);#osj LOC_Os05g02659 - LOC_Os01g24200 + trans 1559 1802 660 905 246 100nt - - chomp $aline; - my @arr=split /\t/,$aline; - my ($ns,$ne,$ns2,$ne2)=(0,0,0,0); - if ($arr[11]=~/Nearby/) { - ($ns,$ne)=&nearby($gene1{$arr[1]}[0][0],$gene1{$arr[1]}[0][1],$gene1{$arr[3]}[0][0],$gene1{$arr[3]}[0][1]); - push @{$nat{$gene1{$arr[1]}[0][2]}},[$ns,$ne,$arr[5],$arr[11],"NATs".$nat_n]; - print TMP "NATs$nat_n\t$arr[1]\t$arr[2]\t$gene1{$arr[1]}[0][2]\t$gene1{$arr[1]}[0][0]\t$gene1{$arr[1]}[0][1]\t$arr[3]\t$arr[4]\t$gene1{$arr[3]}[0][2]\t$gene1{$arr[3]}[0][0]\t$gene1{$arr[3]}[0][1]\t$arr[5]\t$arr[11]\t$ns\t$ne\t$ns\t$ne\n"; - $nat_n++; - }else{ - $ns=$gene1{$arr[1]}[0][0]+$arr[6]-1; - $ne=$gene1{$arr[1]}[0][0]+$arr[7]-1; - $ns2=$gene1{$arr[3]}[0][1]-$arr[9]+1; - $ne2=$gene1{$arr[3]}[0][1]-$arr[8]+1; - push @{$nat{$gene1{$arr[1]}[0][2]}},[$ns,$ne,$arr[5],$arr[11],"NATs$nat_n"."_1"];#start,end,class1,class2 - push @{$nat{$gene1{$arr[3]}[0][2]}},[$ns2,$ne2,$arr[5],$arr[11],"NATs$nat_n"."_2"]; - print TMP "NATs$nat_n\t$arr[1]\t$arr[2]\t$gene1{$arr[1]}[0][2]\t$gene1{$arr[1]}[0][0]\t$gene1{$arr[1]}[0][1]\t$arr[3]\t$arr[4]\t$gene1{$arr[3]}[0][2]\t$gene1{$arr[3]}[0][0]\t$gene1{$arr[3]}[0][1]\t$arr[5]\t$arr[11]\t$ns\t$ne\t$ns2\t$ne2\n"; - $nat_n++; - } -} -close IN; -close TMP; - -my %repeat; -open IN,"<$opts{'r'}"; -my $first=; -$first=; -$first=; -while (my $aline=) { - chomp $aline; - $aline=~s/^\s+//; - my @tmp=split/\s+/,$aline; - $tmp[4]=~s/chr0/Chr/; - $tmp[4]=~s/chr/Chr/; - push @{$repeat{$tmp[4]}},[$tmp[5],$tmp[6],$tmp[10]];#start,end,class - #print "$tmp[4]\t$tmp[5]\t$tmp[6]\t$tmp[10]\n"; -} -close IN; - -my %phase; -open IN,"<$opts{'p'}"; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @tmp=split/\t/,$aline; - if ($tmp[5]>=25) { - $phase{$tmp[0]}=$tmp[5]; - } -} -close IN; - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; -open IN,"<$filein"; #input file -open OUT,">$fileout"; #output file -while (my $aline=) { - chomp $aline; - if($aline=~/^\#/){ - print OUT "$aline\tPhase\tLong\tRepeatClass\tNatClass1\tNatClass2\tNatID\n"; - next; - } - my @tmp=split/\t/,$aline; - my @inf=split/\:/,$tmp[0]; - my @pos=split/\-/,$inf[1]; - my $chr=$inf[0]; - $chr=~s/Chr0/Chr/; - my $start=$pos[0]; - my $end=$pos[1]; - #=========Repeat============ - my @repeat; - if (defined(@{$repeat{$chr}})) { - my @r_array=sort {$a->[0] <=> $b->[0]} @{$repeat{$chr}}; - for (my $i=0;$i<@r_array ;$i++) { - if ($start<$r_array[$i][0] && $end>$r_array[$i][0]) { - push @repeat,$r_array[$i][2]; - } - elsif($start>$r_array[$i][0] && $start<$r_array[$i][1]){ - push @repeat,$r_array[$i][2]; - - } - elsif($end<$r_array[$i][0]){ - last; - } - else{next;} - } - } - my $repeat; - if (@repeat==0) { - $repeat="\/"; - } - else{ - $repeat=join ";",@repeat; - } - #=========nat=============== - my @nat1;#class 1 - my @nat2;#class 2 - my @nat;#nat ID - #foreach my $chr (keys %nat) { - my @n_array=sort {$a->[0] <=> $b->[0] } @{$nat{$chr}}; - for (my $i=0;$i<@n_array ;$i++) { - if ($start<$n_array[$i][0] && $end>$n_array[$i][0]) { - push @nat1,$n_array[$i][2]; - push @nat2,$n_array[$i][3]; - push @nat,$n_array[$i][4]; - } - elsif($start>$n_array[$i][0] && $start<$n_array[$i][1]){ - push @nat1,$n_array[$i][2]; - push @nat2,$n_array[$i][3]; - push @nat,$n_array[$i][4]; - } - elsif($end<$n_array[$i][0]){ - last; - } - else{next;} - } - #} - - my $nat1; - my $nat2; - my $nat; - if (@nat1==0) { - $nat1="\/"; - } - else{ - $nat1=join ";",@nat1; - } - if (@nat2==0) { - $nat2="\/"; - } - else{ - $nat2=join ";",@nat2; - } - if (@nat==0) { - $nat="\/"; - } - else{ - $nat=join ";",@nat; - } - #========phase============== - my $phase="\/"; - if (defined($phase{$tmp[0]})) { - $phase="phase"; - } - #=========long=============== - my $long="\/"; - if ($tmp[1] eq "\>30nt" and $tmp[2]>=0.5) { - $long="long"; - } - print OUT "$aline\t$phase\t$long\t$repeat\t$nat1\t$nat2\t$nat\n"; -} - -close IN; -close OUT; - -sub nearby{ - my @p=@_; - my ($s,$e)=(0,0); - if ($p[1]<$p[2]) { - $s=$p[1]; - $e=$p[2]; - }else{ - $s=$p[3]; - $e=$p[0]; - } - return ($s,$e); -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -g -n -r -p -t -l -options: --i input file - -g gff file - -n NATs file - -r repeat file - -p phase file --o output file --t nat output file --l genelist output file --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb DEGseq_2.pl --- a/DEGseq_2.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2009-05-06 -#Modified: -#Description: 删除matched reads -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","outdir=s","column1:i","mark1=s","depth1:i","depth2:i","column2:i","mark2=s","h"); -if (!(defined $opts{i} and defined $opts{outdir} and defined $opts{mark1} and defined $opts{mark2}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $outputdir=$opts{'outdir'}; -unless ($outputdir=~/\/$/) {$outputdir .="/";} -my $column1=defined $opts{column1} ? $opts{column1} : 3; -my $column2=defined $opts{column2} ? $opts{column2} : 4; -my $mark1=$opts{mark1}; -my $mark2=$opts{mark2}; -my $fileout=$outputdir."degseq.R"; - -open OUT,">$fileout"; #output file -#my ($name,$dir); -#$name=basename($filein); -print OUT "library(DEGseq)\n"; -print OUT "geneExpFile <- system.file(package=\"DEGseq\")\n"; -print OUT "geneExpFile<-file.path(\"$filein\")\n"; -print OUT "layout(matrix(c(1,2,3,4,5,6), 3, 2, byrow=TRUE))\npar(mar=c(2, 2, 2,2))\n"; -print OUT "outputdir<-file.path(\"$outputdir\")\n"; -print OUT "geneExpMatrix1 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column1))\n"; -print OUT "geneExpMatrix2 <- readGeneExp(file=geneExpFile, geneCol=1, valCol=c($column2))\n"; -if(defined $opts{'depth1'} && defined $opts{'depth2'}){ -print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",depth1=$opts{depth1},depth2=$opts{depth2},outputDir=outputdir,method=\"MARS\")\n"; -} -else{ -print OUT "DEGexp(geneExpMatrix1=geneExpMatrix1, geneCol1=1, expCol1=c(2), groupLabel1=\"$mark1\",geneExpMatrix2=geneExpMatrix2, geneCol2=1, expCol2=c(2), groupLabel2=\"$mark2\",outputDir=outputdir,method=\"MARS\")\n"; -} -close OUT; - - -system("R CMD BATCH $fileout"); - -wait; - - - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -outdir -column1 -mark1 -column2 -mark2 -depth1 -depth2 -options: --i input file --outdir output file dir --column1 the first column for DEGseq --mark1 the name of the column1 --depth1 depth for the first file,use for normalize --column2 the second column for DEGseq --mark2 the name of the column2 --depth2 depth for the second file,use for normalize - --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb Length_Distibution.pl --- a/Length_Distibution.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,219 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================================== -# Date: -# Title: -# Comment: Program to plot gene structure -# Input: 1. input file of Gene region annotation which format like GenePred -# 2. input file of Transcripts region annotation which format like GenePred -# 3. input file of gene snp detail info -# Output: output file of gene structure graph by html or svg formt -# Test Usage: -#======================================================================================== -#use strict; -my $version=1.00; -use SVG; -use Getopt::Long; -my %opt; -GetOptions(\%opt,"i=s","o=s",,"h"); -if (!(defined $opt{i} and defined $opt{o}) || defined $opt{h}) { -&usage; -} -#===============================Define Attribute========================================== -my %attribute=( - canvas=>{ - 'width'=>1500, - 'height'=>1800 - }, - text=>{ - 'stroke'=>"#000000", - 'fill'=>"none", - 'stroke-width'=>0.5 - #'stroke-width2'=>1 - }, - line=>{ - 'stroke'=>"black", - 'stroke-width'=>1 - }, - font=>{ - 'fill'=>"#000000", - 'font-size'=>12, - 'font-size2'=>10, - 'font-weight'=>'bold', - 'font-family'=>"Arial" - #'font-family2'=>"ArialNarrow-bold" - }, - rect=>{ - 'fill'=>"lightgreen", - 'stroke'=>"black", - 'stroke-width'=>0.5 - }, - readwidth=>0.5 -); -#my $Xscale=600/$length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 -#========================================data============================ -open(IN,"$opt{i}")||die"cannot open the file $opt{i}"; -my @R_length; -my @T_length; -my $R_number=0; -my $T_number=0; -my $R_max=0; -my $T_max=0; - -my $title=; -chomp $title; -my @title=split/\t/,$title; -my @mark=split/\s+/,$title[1]; -my $sample_number=@mark; -while (my $aline=) { - if ($aline=~/^\s/) { - my $T_title=; - chomp $T_title; - while (my $a_aline=) { - chomp $a_aline; - my @temp=split/\t/,$a_aline; - my @number=split/\s+/,$temp[1]; - for (my $i=0;$i<@number ;$i++) { - if ($R_max<$number[$i]) { - $R_max=$number[$i]; - } - } - push @R_length,[$temp[0],@number]; - $R_number++; - } - } - else { - chomp $aline; - my @temp=split/\t/,$aline; - my @number=split/\s+/,$temp[1]; - for (my $i=0;$i<@number ;$i++) { - if ($T_max<$number[$i]) { - $T_max=$number[$i]; - } - } - push @T_length,[$temp[0],@number]; - $T_number++; - } -} -close IN; -print "Tag max: $T_max\nRead max: $R_max\n"; -my $kd_number=5; -##=======================Reads 纵坐标刻度========================== -my $r=1; -my $rr=1; -my $R=$R_max; -while ($R>10) { - $R=$R/10; - $r=$r*10; - $rr++; -} -$R=int($R+0.5); -my $R_xg=$R/$kd_number*$r;#纵坐标一小格大小（一共10格） -my $R_kedu_scale_x=6*$rr;#纵坐标刻度文字 -##=======================Tags 纵坐标刻度========================== -my $t=1; -my $tt=1; -my $T=$T_max; -while ($T>10) { - $T=$T/10; - $t=$t*10; - $tt++; -} -$T=int($T+0.5); -my $T_xg=$T/$kd_number*$t;#纵坐标一小格大小（一共10格） -my $T_kedu_scale_x=6*$tt;#纵坐标刻度文字 - -#############################s#define start coordinate and scale -my $XOFFSET=50; -my $YOFFSET=60; -my $width=800; -my $heigth=800; -my $X_width=600; -#my $height=1600; -#### Starting #### -#新建画布 -my $svg=SVG->new(width=>$width,height=>$heigth); -####坐标轴 -my $axisL=300;#read 纵坐标长度 -my $x_margin = 50; -#=========Reads number setting========================================== -my $Y_R_title=30;#标题的纵向宽度 -my $Y_R_0=$YOFFSET+$axisL+$Y_R_title; -my $X_R_0=$XOFFSET+$x_margin; -my $R_Yscale=$axisL/$R_xg/$kd_number; -my $R_Xscale=$X_width/$R_number/($sample_number+1); -#=====================================Reads Y axis====================== -$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0,'y2',$Y_R_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); -for (my $i=1;$i<$kd_number ;$i++) { - $svg->line('x1',$X_R_0-5,'y1',$Y_R_0-$i*$R_xg*$R_Yscale,'x2',$X_R_0,'y2',$Y_R_0-$i*$R_xg*$R_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - $svg->text('x',$X_R_0-$R_kedu_scale_x,'y',$Y_R_0-$i*$R_xg*$R_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$R_xg); -} -#=====================================Reads X axis====================== -$svg->line('x1',$X_R_0,'y1',$Y_R_0,'x2',$X_R_0+$X_width,'y2',$Y_R_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - -#print "$R_number\t$sample_number\n"; -for ($i=0;$i<$R_number ;$i++) { - for (my $j=1;$j<$sample_number+1 ;$j++) { - my $red=$j/$sample_number*255; - $svg->rect('x',$X_R_0+($j+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0-$R_length[$i][$j]*$R_Yscale,'width',$R_Xscale,'height',$R_length[$i][$j]*$R_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); - } - $svg->text('x',$X_R_0+(1+$sample_number/2+$i*($sample_number+1))*$R_Xscale,'y',$Y_R_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$R_length[$i][0]); -} -#===Reads number title -$svg->text('x',$XOFFSET+400,'y',$YOFFSET,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Reads Length Distribution"); -#===Reads -for (my $i=0;$i<$sample_number ;$i++) { - my $red=($i+1)/$sample_number*255; - $svg->rect('x',$X_R_0+550,'y',$YOFFSET+$Y_R_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); - $svg->text('x',$X_R_0+550+30,'y',$YOFFSET+$Y_R_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); -} -####================================================================================== -#=========================================Tag s -my $Y_T_title=30;#标题的纵向宽度 -my $Y_T_0=$Y_R_0+$axisL+$Y_R_title+50;#length size -my $X_T_0=$XOFFSET+$x_margin; -my $T_Yscale=$axisL/$T_xg/$kd_number; -my $T_Xscale=$X_width/$T_number/($sample_number+1); -#=====================================Tags Y axis====================== -$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0,'y2',$Y_T_0-$axisL,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); -for (my $i=1;$i<$kd_number ;$i++) { - $svg->line('x1',$X_T_0-5,'y1',$Y_T_0-$i*$T_xg*$T_Yscale,'x2',$X_T_0,'y2',$Y_T_0-$i*$T_xg*$T_Yscale,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - $svg->text('x',$X_T_0-$T_kedu_scale_x,'y',$Y_T_0-$i*$T_xg*$T_Yscale+4,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$i*$T_xg); -} -#=====================================Tags X axis====================== -$svg->line('x1',$X_T_0,'y1',$Y_T_0,'x2',$X_T_0+$X_width,'y2',$Y_T_0,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - -#print "$R_number\t$sample_number\n"; -for ($i=0;$i<$T_number ;$i++) { - for (my $j=1;$j<$sample_number+1 ;$j++) { - my $red=$j/$sample_number*255; - $svg->rect('x',$X_T_0+($j+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0-$T_length[$i][$j]*$T_Yscale,'width',$T_Xscale,'height',$T_length[$i][$j]*$T_Yscale,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); - } - $svg->text('x',$X_T_0+(1+$sample_number/2+$i*($sample_number+1))*$T_Xscale,'y',$Y_T_0+15,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',6,'font-family',$attribute{font}{'font-family'},'-cdata',$T_length[$i][0]); -} -#===Reads number title -$svg->text('x',$XOFFSET+400,'y',$Y_R_0+30+$Y_T_title,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',"1",'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Tags Length Distribution"); -#===Reads -for (my $i=0;$i<$sample_number ;$i++) { - my $red=($i+1)/$sample_number*255; - $svg->rect('x',$X_T_0+550,'y',$Y_R_0+30+$Y_T_title+20*$i,'width',15,'height',10,'stroke',"black",'stroke-width',"0.5",'fill',"rgb($red,125,0)"); - $svg->text('x',$X_T_0+550+30,'y',$Y_R_0+30+$Y_T_title+20*$i+10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$i]); -} - - - - -open (OUT,">$opt{o}"); -print OUT $svg->xmlify(); - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -options: --i --o svg output --h help -USAGE -exit(1); -} \ No newline at end of file diff -r 9dcffd531c76 -r b6686462d0cb SampleDEGseqMerge.pl --- a/SampleDEGseqMerge.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: chentt@big.ac.cn -#Date: 2014-05-21 -#Modified: -#Description: merged deg file and total information -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i:s@","mark:s@","f:s","o=s","n=s","h"); -if (!(defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my @filein=@{$opts{'i'}}; -my @mark=@{$opts{'mark'}}; -my $fileout=$opts{'o'}; -my $number=$opts{'n'}; - -my %hash; -open IN,"<$filein[0]"; #input file - -while (my $aline=) { - chomp $aline; - next if($aline=~/^\"/); - my @temp=split/\t/,$aline; - $hash{$temp[0]}=$temp[4]."\t".$temp[6]."\t".$temp[7]."\t".$temp[-1]; -} -close IN; - -for (my $i=1;$i<=$#filein;$i++) { - open IN,"<$filein[$i]"; #input file - - while (my $aline=) { - chomp $aline; - next if($aline=~/^\"/); - my @temp=split/\t/,$aline; - if (!(defined $hash{$temp[0]})) { - print "Not find $temp[0]in sample one!\n"; - next; - } - $hash{$temp[0]} .="\t".$temp[4]."\t".$temp[6]."\t".$temp[7]."\t".$temp[-1]; - } - close IN; -} - -open OUT,">$fileout"; #output file -my $deg_title; -foreach (@mark) { - $deg_title.="log2(Fold_change)\tp_value\tq_value\t".$_."\t"; -} -$deg_title=~s/\s+$//; -my %function; -my $title; -open F,"<$opts{f}"; -while (my $aline=) { - chomp $aline; - if($aline=~/^\#/){ - my $title=$aline; - my @title=split/\t/,$aline; - $title[2+$number].="\t".$deg_title; - $title=join"\t",@title; - print OUT "$title\n"; - next; - } - my @temp=split/\t/,$aline; - $temp[2+$number].="\t".$hash{$temp[0]}; - my $temp=join"\t",@temp; - print OUT "$temp\n"; - -} -close F; -close OUT; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -mark -f -options: --i input file # -i output_score.txt -i output_score.txt -i output_score.txt --mark sample name # -mark sam1_VS_sam2 -mark sam1_VS_sam3 -mark sam2_VS_sam3 --f cluster file --n sample number --o output file --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb bed2wig.pl --- a/bed2wig.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Chentt -#Email: chentt@big.ac.cn -#Date: 2014/06/25 -#Modified: -#Description: get out larger than cut off sequence -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; -use FileHandle; - -my %opts; -GetOptions(\%opts,"i=s","o=s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $outputdir=$opts{'o'}; -unless ($outputdir=~/\/$/) {$outputdir .="/";} - -##############################get cmap################## -my %cmap; -open IN,"<$opts{i}"; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @temp=split/\t/,$aline; - $cmap{$temp[0]}=$outputdir.$temp[0]; -} -close IN; -###########################split ma file###################### -my %handle; -foreach (keys %cmap) { - my $name=$cmap{$_}.".sam"; - open $handle{$_},">$name"; -} -open IN,"<$opts{i}"; -while (my $aline=) { - next if($aline=~/^\#/); - chomp $aline; - my @temp=split/\t/,$aline; - $handle{$temp[0]}-> print ($aline,"\n"); - -} -close IN; -foreach (keys %handle) {close $_;} - - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file --o output file --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb buildInFont.pl --- a/buildInFont.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10599 +0,0 @@ -#!/usr/bin/perl -w -#Author:Li Shengting -#E-mail:lishengting@genomics.org.cn -#Program Date:2002-12-2015:53 -#Last Update:2006-11-14 0:19 -#Describe:add fonts defs to svg for batik -my $ver=1.00; # -use strict; -#use diagnostics; -#use Getopt::Long; - -###################################################################################################################### -# Usage -###################################################################################################################### -my $usage=<<"USAGE"; -#$ver Usage: buildInFont [font-name] -USAGE -my $argvNumber=3; -die $usage if (@ARGV<$argvNumber); -undef($usage); -undef($argvNumber); -###################################################################################################################### -#my %opts; -#GetOptions(\%opts,"a!","b:s"); -###################################################################################################################### -# Constant -###################################################################################################################### -#use constant PI => 3.1415926535897932384626433832795; -###################################################################################################################### -# Variable -###################################################################################################################### -my ($svgF,$fontF,$outSvg)=@ARGV; -my ($font,$defs); -#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -# Begin -#///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -open(F,"$svgF") || die "Can't open $svgF!\n"; -open(O,">$outSvg") || die "Can't write $outSvg!\n"; -if ($fontF eq "x") { - $fontF=$0; -} -$defs=0; -while () { - next if (//); - next if (//); - next if (//); - next if (//); - next if (//); - print O; - if (/\n Analysis Report \n - \n

\n \n Small RNA Analysis Report\n \n

1. Sequence No. and quality

1.1 Sequece No.

-"; - -### raw data no -open IN,"<$config"; -my @files;my @marks; my @rawNo; -while (my $aline=) { - chomp $aline; - my @tmp=split/\t/,$aline; - push @files,$tmp[0]; - - my $no=`less $tmp[0] |wc -l `; - chomp $no; - if ($opts{'format'} eq "fq" || $opts{'format'} eq "fastq") { - $no=$no/4; - } - else{ - $no=$no/2; - } - push @rawNo,$no; - - push @marks,$tmp[1]; -} -close IN; - -### preprocess -unless ($prepath=~/\/$/) { - $prepath .="/"; -} - -my @trimNo;my @collapse; -my $collapsefile=$prepath."collapse_reads.fa"; -open IN,"<$collapsefile"; -while (my $aline=) { - chomp $aline; - ; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $trimNo[$i] +=$lng[$i]; - $collapse[$i] ++; - } - } -} -close IN; - -my @cleanR;my @cleanT; -my $clean=$prepath."collapse_reads_18-40.fa"; -open IN,"<$clean"; -while (my $aline=) { - chomp $aline; - ; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $cleanR[$i] +=$lng[$i]; - $cleanT[$i] ++; - } - } -} -close IN; - -my @filterR;my @filterT; -my $filter=$prepath."collapse_reads_out.fa"; -open IN,"<$filter"; -while (my $aline=) { - chomp $aline; - ; - $aline=~/:([\d|_]+)_x(\d+)$/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $filterR[$i] +=$lng[$i]; - $filterT[$i] ++; - } - } -} -close IN; - - -print OUT " - - -"; -foreach (@marks) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@rawNo) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@trimNo) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@collapse) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@cleanR) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@cleanT) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@filterR) { - print OUT "\n"; -} -print OUT " - - -"; -foreach (@filterT) { - print OUT "\n"; -} -print OUT "\n

	$_
Raw Reads No.	$_
Reads No. After Trimed 3\' adapter	$_
Unique Tags No.	$_
Clean Reads No.	$_
Clean Tags No.	$_
Filter Reads No. $reads count \>3$	$_
Filter Tags No. $reads count \>3$	$_

"; -print OUT "

-Note:
-The raw data file path is: $files[0]
-"; -for (my $i=1;$i<@files;$i++) { - print OUT " $files[$i]
"; -} -print OUT "The collapsed file path is: $collapsefile
-The clean data file path is: $clean
-The filter (remain total reads>3) data file path is: $filter
-

1. Sequence length count

-"; -print OUT "\n"; - -my $length=$prepath."length.html"; -open IN,"<$length"; -while (my $aline=) { - chomp $aline; - print OUT "$aline\n"; -} -close IN; - -print OUT "

Note:
The sequence length data: length file -

-"; - -#### rfam -unless ($rfampath=~/\/$/) { - $rfampath .="/"; -} -unless ($genomepath=~/\/$/) { - $genomepath .="/"; -} -print OUT "

2. Rfam non-miRNA annotation

2.1 Reads count

- - -"; - -my @rfamR; my @rfamT; -my $tag=1; -open IN,"<$dir/rfam_match/rfam_non-miRNA_annotation.txt"; -while (my $aline=) { - chomp $aline; - $tag=0 if($aline=~/tags\s+number/); - next if($aline=~/^\#/); - next if($aline=~/^\s*$/); - my @tmp=split/\s+/,$aline; - if($tag == 1){push @rfamR,[@tmp];} - else{push @rfamT,[@tmp];} -} -close IN; - - -print OUT "\n"; -foreach (@marks) { - print OUT "\n"; -} -for (my $i=0;$i<@rfamR;$i++) { - print OUT " - - - "; - for (my $j=1;$j<@{$rfamR[$i]} ;$j++) { - print OUT "\n"; - } -} - -print OUT "\n

RNA Name	$_
$rfamR[$i][0]	$rfamR[$i][$j]

2.2 Tags count

- - - \n"; -foreach (@marks) { - print OUT "\n"; -} -for (my $i=0;$i<@rfamT;$i++) { - print OUT " - - - "; - for (my $j=1;$j<@{$rfamT[$i]} ;$j++) { - print OUT "\n"; - } -} -print OUT "\n

RNA Name	$_
$rfamT[$i][0]	$rfamT[$i][$j]

Note:
The rfam mapping results is: $rfampath"; -print OUT "rfam_mapped.bwt

"; - -open IN,"<$dir/genome_match/genome_mapped.bwt"; -my @genome_r_u; -my @genome_r_m; -my @genome_t_u; -my @genome_t_m; -my $tags_map_number=0; -while (my $aline=) { - chomp $aline; - my @temp=split/\t/,$aline; - if ($temp[6]==0) { - $aline=~/:([\d|_]+)_x(\d+)/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $genome_r_u[$i] +=$lng[$i]; - $genome_t_u[$i] ++; - } - } - $tags_map_number++; - } - if ($temp[6]>0) { - $aline=~/:([\d|_]+)_x(\d+)/; - my @lng=split/_/,$1; - for (my $i=0;$i<@lng;$i++) { - if ($lng[$i]>0) { - $genome_r_m[$i] +=$lng[$i]; - $genome_t_m[$i] ++; - } - } - for (my $i=0;$i<$temp[6] ;$i++) { - my $next=; - } - $tags_map_number++; - } -} -close IN; -#

3.1 Reads count

-# -# -print OUT "

3. genome mapping result

- -\n -"; -foreach (@marks) { - print OUT "\n"; -} -print OUT " - - -"; -for (my $i=0;$i<@genome_r_u ;$i++) { - print OUT "\n"; -} - -print OUT " - - -"; -for (my $i=0;$i<@genome_t_u ;$i++) { - print OUT "\n"; -} - -print OUT " - - -"; -for (my $i=0;$i<@genome_r_m ;$i++) { - print OUT "\n"; -} - -print OUT " - - -"; -for (my $i=0;$i<@genome_t_m ;$i++) { - print OUT "\n"; -} - -print OUT "\n

Map	$_
Uniq Map Reads No.	$genome_r_u[$i]
Uniq Map Tags No.	$genome_t_u[$i]
Multiple Map Reads No.	$genome_r_m[$i]
Multiple Map Tags No.	$genome_t_m[$i]

Note:
The genome mapping results is: $genomepath"; -print OUT "genome_mapped.bwt

"; - -my $cluster="$clusterpath/sample_reads.cluster"; -my $cluster_number=`less $cluster |wc -l `; -$cluster_number=$cluster_number-1; -my (%cluster_length,@exp,@rpkm); -my @exp_range=qw(0 $0--10] \(10--100] \(100--1000] \(1000--10000] \(10000--100000] \(100000--**$); -my @rpkm_range=qw(0 \(0--0.25] \(0.25--0.5] \(0.5--1] \(1.0-5.0] \(5--10] \(10--50] \(50--100] \(100--500] \(500--1000] \(1000--**]); - -open IN,"<$cluster"; -while (my $aline=) { - next if($aline=~/^\"/); - chomp $aline; - my @temp=split/\t/,$aline; - my @id=split/:|-/,$temp[0]; - $cluster_length{$id[2]-$id[1]+1}++; - for (my $i=0;$i<@marks ;$i++) { - if ($temp[$i+3] == 0) {$exp[0][$i]++;} - elsif ($temp[$i+3]>0 && $temp[$i+3]<= 10 ){$exp[1][$i]++;} - elsif ($temp[$i+3]>10 && $temp[$i+3]<=100){$exp[2][$i]++;} - elsif ($temp[$i+3]>100 && $temp[$i+3]<=1000){$exp[3][$i]++;} - elsif ($temp[$i+3]>1000 && $temp[$i+3]<=10000){$exp[4][$i]++;} - elsif ($temp[$i+3]>10000 && $temp[$i+3]<=100000){$exp[5][$i]++;} - elsif ($temp[$i+3]>100000){$exp[6][$i]++;} - } -} -close IN; - -my $cluster_rpkm="$clusterpath/sample_rpkm.cluster"; -open IN,"<$cluster_rpkm"; -while (my $aline=) { - next if($aline=~/^\#/); - chomp $aline; - my @temp=split/\t/,$aline; - for (my $i=0;$i<@marks ;$i++) { - if ($temp[$i+3]==0) {$rpkm[0][$i]++;} - elsif($temp[$i+3]>0 && $temp[$i+3]<=0.25){$rpkm[1][$i]++;} - elsif($temp[$i+3]>0.25 && $temp[$i+3]<=0.5){$rpkm[2][$i]++;} - elsif($temp[$i+3]>0.5 && $temp[$i+3]<=1){$rpkm[3][$i]++;} - elsif($temp[$i+3]>1 && $temp[$i+3]<=5){$rpkm[4][$i]++;} - elsif($temp[$i+3]>5 && $temp[$i+3]<=10){$rpkm[5][$i]++;} - elsif($temp[$i+3]>10 && $temp[$i+3]<=50){$rpkm[6][$i]++;} - elsif($temp[$i+3]>50 && $temp[$i+3]<=100){$rpkm[7][$i]++;} - elsif($temp[$i+3]>100 && $temp[$i+3]<=500){$rpkm[8][$i]++;} - elsif($temp[$i+3]>500 && $temp[$i+3]<=1000){$rpkm[9][$i]++;} - else{$rpkm[10][$i]++;} - } -} - -close IN; - -my $cluster_length_file="$clusterpath/cluster_length.txt"; -open LEN,">$cluster_length_file"; -print LEN "\#length\tcluster_number\n"; -foreach my $key (sort keys %cluster_length) { - print LEN "$key\t$cluster_length{$key}\n"; -} -close LEN; -print OUT "

4. cluster result

4.1 Cluster count

- - - - - - - - - -\n

	Merged samples
Tags number	$tags_map_number
Cluster number	$cluster_number

-"; - -print OUT "

4.2 Cluster length

Note:
The clusters length data: length file -

-"; -print OUT "

4.3 Quantify

- - -\n -"; -foreach (@marks) { - print OUT "\n"; -} -for (my $i=0;$i<@exp_range;$i++) { - print OUT " - - - "; - for (my $j=0;$j<@marks ;$j++) { - if (!(defined($exp[$i][$j]))) { - print OUT "\n"; - } - else{print OUT "\n";} - } -} -print OUT "\n

Reads Range	$_
$exp_range[$i]	0	$exp[$i][$j]

"; - -print OUT "\n - -\n -"; -foreach (@marks) { - print OUT "\n"; -} -for (my $i=0;$i<@rpkm_range;$i++) { - print OUT " - - - "; - for (my $j=0;$j<@marks ;$j++) { - if (!(defined($rpkm[$i][$j]))) { - print OUT "\n"; - } - else{print OUT "\n";} - } -} -print OUT "\n

RPKM Range	$_
$rpkm_range[$i]	0	$rpkm[$i][$j]

"; - -my $annotate="$annotatepath/sample_c_p.anno"; -my (%posit,%repeat,%nat1,%nat2); -my (@phase,@long,@repeat,@nat); -for (my $j=0;$j<@marks ;$j++) { - $phase[$j]=0; - $long[$j]=0; - $repeat[$j]=0; - $nat[$j]=0; -} - -my $class_anno=1; -open ANNO,"<$annotate"; -while (my $aline=) { - chomp $aline; - my @temp=split/\t/,$aline; - if($aline=~/^\#/){ - if (@temp != 10+@marks) { - $class_anno=0; - } - next; - } - for (my $i=3+@marks+$class_anno;$i<@temp;$i++) { - my @posit=split/\;/,$temp[$i]; - for (my $j=0;$j<@marks ;$j++) { - if ($temp[3+$j]>0) { - $posit{$posit[0]}[$j]++; - } - else{ - if (!(defined($posit{$posit[0]}[$j]))) { - $posit{$posit[0]}[$j]=0; - } - } - } - } - if ($class_anno) { - for (my $j=0;$j<@marks ;$j++) { - if ($temp[3+$j]>0) { - if ($temp[6] eq "phase") { - $phase[$j]++; - } - if ($temp[7] eq "long") { - $long[$j]++; - } - if ($temp[8] ne "\/") { - $repeat[$j]++; - my @rr=split/\;/,$temp[8]; - foreach (@rr) { - $repeat{$_}[$j]++; - } - } - if ($temp[9] ne "\/") { - $nat[$j]++; - my @nn1=split/\;/,$temp[9]; - my @nn2=split/\;/,$temp[10]; - for (my $k=0;$k<@nn1 ;$k++) { - $nat1{$nn1[$k]}[$j]++; - $nat2{$nn2[$k]}[$j]++; - } - } - } - } - } -} -close ANNO; - -print OUT "

5. Cluster Annotate

5.1 Cluster genome position annotate

- - -\n -"; - -foreach (@marks) { - print OUT "\n"; -} -foreach my $key (sort keys %posit) { - print OUT " - - - "; - foreach (@{$posit{$key}}) { - print OUT "\n"; - } -} -print OUT "\n

clusters number	$_
$key	$_

"; -print OUT "

-Note:
-One cluster mybe annotate to multiple genes
-"; - -if ($class_anno) { - print OUT "

5.2 Cluster source mechanism annotate

- - - \n - "; - - foreach (@marks) { - print OUT "\n"; - } - print OUT " - - \n - "; - foreach (@phase) { - print OUT "\n"; - } - - print OUT " - - \n - "; - foreach (@long) { - print OUT "\n"; - } - - print OUT " - - \n - "; - foreach (@repeat) { - print OUT "\n"; - } - - print OUT " - - \n - "; - foreach (@nat) { - print OUT "\n"; - } - print OUT "\n

clusters number	$_
Phase	$_
Long	$_
Repeat	$_
Nat	$_

"; - - print OUT "

- Repeat subclass annotate: - "; - - print OUT " - - \n - "; - foreach (@marks) { - print OUT "\n"; - } - - foreach my $key (sort keys %repeat) { - print OUT " - - - "; - for (my $i=0;$i<@marks ;$i++) { - if (defined($repeat{$key}[$i])) { - print OUT "\n"; - } - else{print OUT "\n";} - } - } - print OUT "\n
Repeat subclass $_
$key $repeat{$key}[$i] 0
"; - - - print OUT "

Repeat subclass	$_
$key	$repeat{$key}[$i]	0

- Nat subclass1 annotate: - "; - - print OUT " - - \n - "; - foreach (@marks) { - print OUT "\n"; - } - foreach my $key (sort keys %nat1) { - print OUT " - - - "; - for (my $i=0;$i<@marks ;$i++) { - if (defined($nat1{$key}[$i])) { - print OUT "\n"; - } - else{print OUT "\n";} - } - } - print OUT "\n
Nat subclass1 $_
$key $nat1{$key}[$i] 0
"; - - print OUT "

Nat subclass1	$_
$key	$nat1{$key}[$i]	0

- Nat subclass2 annotate: - "; - - print OUT " - - \n - "; - foreach (@marks) { - print OUT "\n"; - } - foreach my $key (sort keys %nat2) { - print OUT " - - - "; - for (my $i=0;$i<@marks ;$i++) { - if (defined($nat2{$key}[$i])) { - print OUT "\n"; - } - else{print OUT "\n";} - } - } - print OUT "\n
Nat subclass2 $_
$key $nat2{$key}[$i] 0
"; - print OUT "

Nat subclass2	$_
$key	$nat2{$key}[$i]	0

- Note:
- One cluster mybe annotate to multiple repeats or nats
- "; -} -else { - print OUT "

5.2 Cluster source mechanism annotate

-
Do not do source mechanism annotate
"; - -} - -print OUT "

6. Graph of Clusters of all samples

\n"; - -my $plot=$plotpath."cluster.html"; -open IN,"<$plot"; -while (my $aline=) { - chomp $aline; - print OUT "$aline\n"; -} -close IN; - - -if ($deg_tag) { - my $deg_file=`ls $degpath`; - chomp $deg_file; - my @deg_file=split/\n/,$deg_file; - my %deg; - foreach (@deg_file) { - my $output="$degpath/$_/output_score.txt"; - open IN,"<$output"; - $deg{$_}[0]=0; - $deg{$_}[1]=0; - $deg{$_}[2]=0; - while (my $aline=) { - next if ($aline=~/^\"/); - chomp $aline; - my @temp=split/\t/,$aline; - if ($temp[9] eq "TRUE") { - $deg{$_}[0]++; - if ($temp[4] >0) { - $deg{$_}[1]++; - } - if ($temp[4] <0) { - $deg{$_}[2]++; - } - } - } - close IN; - } - - print OUT "

7. DEG

- - - \n - \n - \n - \n - "; - - foreach my $key (sort keys %deg) { - print OUT " - - - "; - for (my $i=0;$i<@{$deg{$key}} ;$i++) { - print OUT "\n"; - } - } - print OUT "\n

Genes number	DEG	UP	DOWN
$key	$deg{$key}[$i]

"; -} -else{ - print OUT "

7. DEG

-
Do not do DE clusters
"; -} - -print OUT " - - -"; -close OUT; - - - - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -o -options: --i --format --o output file --h help -USAGE -exit(1); -} diff -r 9dcffd531c76 -r b6686462d0cb install_DEG.R --- a/install_DEG.R Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -source("http://bioconductor.org/biocLite.R") -biocLite("DEGseq") diff -r 9dcffd531c76 -r b6686462d0cb matching.pl --- a/matching.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","g=s","index:s","v:i","p:i","r:s","o=s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; -unless ($fileout=~/\/$/) {$fileout.="/";} -my $genome=$opts{'g'}; -my $mis=defined $opts{'v'}? $opts{'v'} : 0; -my $hits=defined $opts{'r'}? $opts{'r'} : 25; -my $index=defined $opts{'index'} ? $opts{'index'} : ""; -my $threads=defined $opts{'p'} ? $opts{'p'} : 1; - - -#my $time=time(); -#my $mapdir=$fileout."/genome_match_".$time; -my $mapdir=$fileout."/genome_match"; -mkdir $mapdir; -chdir $mapdir; -###check genome index -if (-s $index.".1.ebwt") { -}else{ - `bowtie-build $genome genome`; - $index="genome"; -} - -### genome mapping -`bowtie -v $mis -f -p $threads -m $hits -a --best --strata $index $filein --al genome_mapped.fa --un genome_not_mapped.fa > genome_mapped.bwt 2> run.log`; - -#`convert_bowtie_to_blast.pl genome_mapped.bwt genome_mapped.fa $genome > genome_mapped.bst`; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file# input reads fasta/fastq file --g input file# genome file --index file-prefix #(must be indexed by bowtie-build) The parameter - string must be the prefix of the bowtie index. For instance, if - the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then - the prefix is 'h_sapiens_37_asm'.##can be null --v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; - --p/--threads number of alignment threads to launch (default: 1) - --r int a read is allowed to map up to this number of positions in the genome - default is 25 - --o output directory - --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb nibls.pl --- a/nibls.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,319 +0,0 @@ -#!/usr/bin/perl -##################################################################################################### -#LocusPocus is a free script, it is provided with the hope that you will enjoy, you may freely redistribute it at will. We would be greatful if you would keep these acknowledgements with it. -# -# Dan MacLean -# dan.maclean@sainsbury-laboratory.ac.uk -# -# This program is free academic software; academic and non-profit -# users may redistribute it freely. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# This software is released under GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 -# see included file GPL3.txt -# -# - - -###Dont forget you will need ... -##################################################################################################### -# Boost::Graph -#Copyright 2005 by David Burdick -# Available from http://search.cpan.org/~dburdick/Boost-Graph-1.2/Graph.pm -#Boost::Graph is free software; you can redistribute it and/or modify it under the same terms as Perl itself. -##################################################################################################### - - - -use strict; -use warnings; -use Boost::Graph; -use Getopt::Long; - - -my $usage = "usage: $0 -f GFF_FILE [options]\n\n -m minimum inclusion distance (default 5)\n -c clustering coefficient (default 0.6) -b buffer between graphs (default 0) -k sample mark -o output file -t temp output file\n"; - -my $gff_file ; -my $min_inc = 5; -my $clus = 0.6; -my $buff = 0; -my $output_file; -my $temp; -my $mark; - -GetOptions( - - 'c=f' => \$clus, - 'm=i' => \$min_inc, - 'f|file=s' => \$gff_file, - 'b=i' => \$buff, - 'o=s' => \$output_file, - 't=s' => \$temp, - 'k=s' => \$mark -) ; - - -die $usage unless $gff_file; - - -my $starttime = time; -warn "started $starttime\n"; - -## load in data -my %molecules; # stores starts and ends of srnas -open GFF, "<$gff_file"; - -while (my $entry = ){ - - chomp $entry; - next if($entry=~/^\#/); - my @data = split(/\t/,$entry); - my $chr=shift @data; - my $strand=shift @data; - my $start=shift @data; - my $end=shift @data; -# my $length1=$end-$start+1; -# if ($length1>30) { -# $length1=40; -# } - my $total; - for (my $s=0;$s<@data ;$s++) { - $total+=$data[$s]; - } - push @data,$total; -# push @data,$length1; -# if (defined $molecules{$chr}{$start}{$end}{$strand}) { -# my @old_data=split(/;/,$molecules{$chr}{$start}{$end}{$strand}); -# for (my $i=0;$i<$#old_data ;$i++) { -# $data[$i]+=$old_data[$i]; -# } -# } - my $data=join ";",@data; - $molecules{$chr}{$start}{$end}{$strand} = $data;#chr#start#end#strand#add Tags information - #print "$chr\t$start\t$end\n"; -} - -close GFF; - -warn "Data loaded...\nBuilding graphs and finding loci\nPlease be patient, this can take a while...\n"; - -my @sample=split/\#/,$mark; -$mark=join"\"\t\"",@sample; -open OUT, ">$output_file"; -print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n"; -open CLUSTER,">$temp"; -print CLUSTER "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n"; -foreach my $chromosome (keys %molecules){ - my $g = new Boost::Graph(directed=>0); - my @starts = keys(%{$molecules{$chromosome}} ); - @starts = sort {$a <=> $b} @starts; - - while (my $srna_start = shift @starts){ ## work from left most sRNA to right most, add to graph if they close enough - - - foreach my $srna_end (keys %{$molecules{$chromosome}{$srna_start}}){ - - - ###use new graph if the next srna is too far away from this one.. - if(defined $starts[0] and $srna_end + $min_inc + $buff < $starts[0]){ - - - ##dump the info from the old graph - if (scalar(@{$g->get_nodes()}) > 2){ - - my $cluster_coeff = get_cc($g); - if ($cluster_coeff >= $clus){ - dump_locus($g, $cluster_coeff); - } - } - - - $g = new Boost::Graph(directed=>0); - - } - - foreach my $e (keys %{$molecules{$chromosome}{$srna_start}}){ ### extra bit because all loci with same start and different end overlap by definition. but are not collected by main search below - - unless ($e eq $srna_end){ - my $sn = $chromosome. ':' . $srna_start . ':' . $srna_end; ## turn coordinate of sRNA inro a node name - my $en = $chromosome. ':' . $srna_start . ':' . $e; - $g->add_edge(node1=>"$sn", node2=>"$en", weight=>'1'); - } - - } - - foreach my $start (@starts){ ##build graph of overlaps - my $new = 0; - last if $start - $min_inc > $srna_end; - if ($start - $min_inc <= $srna_end){ - - my $start_node = $chromosome . ':' . $srna_start . ':' . $srna_end; - foreach my $end (keys %{$molecules{$chromosome}{$start}}){ - - my $end_node = $chromosome . ':' . $start . ':' . $end; - $g->add_edge(node1=>"$start_node", node2=>"$end_node", weight=>'1'); - } - - } - } - } - - if (!(defined $starts[0])) { - ##dump the info from the last graph - if (scalar(@{$g->get_nodes()}) > 2){ - - my $cluster_coeff = get_cc($g); - if ($cluster_coeff >= $clus){ - dump_locus($g, $cluster_coeff); - } - } - } - } -} - -warn "Loci printed\nFinished\n"; - -my $endtime = time; - -my $elapsed = $endtime - $starttime; - -warn "Time elapsed = $elapsed s\n"; -close OUT; -close CLUSTER; -######################################################################################### -sub get_cc{ ## do cluster coeff calculation. No useful method anyway so self implemented NB, this is an undirected graph so k is n(n-1)/2 - - my $graph = shift; - - my @component = @{$graph->get_nodes()}; #number of nodes - my @clustering_coefficients; - - foreach my $vertex (@component) - { - - my @neighbours = @{$graph->neighbors($vertex)}; - - my %edges_in_graph; - - my $n = @neighbours; #n = the number of neighbours - my $k = ($n * ($n - 1))/2; #k = total number of possible connections - - my $e= 0; #actual number of connections within sub-graph - - foreach my $neighbour (@neighbours) - { - foreach my $neighbour_2 (@neighbours) - { - my $edge1 = "$neighbour\t$neighbour_2"; - my $edge2 = "$neighbour_2\t$neighbour"; - unless (exists $edges_in_graph{$edge1} or exists $edges_in_graph{$edge2}) - { - if ($graph->has_edge($neighbour, $neighbour_2) or $graph->has_edge($neighbour_2, $neighbour)) - { - ++$e; - $edges_in_graph{$edge1}=1; - $edges_in_graph{$edge2}=1; - } - } - } - } - - if ($k >= 1) - { - my $c = $e / $k; - push @clustering_coefficients, $c; - } - else {push @clustering_coefficients, '0';} - } - - my $graph_n = scalar(@clustering_coefficients); - my $graph_cc = 0; - foreach my $cc (@clustering_coefficients){ - - $graph_cc = $graph_cc + $cc; - - } - $graph_cc = $graph_cc / $graph_n; - - return $graph_cc; -} - -############################################################################################################ - -sub dump_locus{ - - my $g = shift; - my $cc = shift; - my $chr; - my $start = 1000000000000000000000000000000000000000000000; - my $end = -1; - my @sample; - my @tag; - foreach my $node (@{$g->get_nodes()}){ - - $node =~ m/^(\S+):(\d+):(\d+)$/; - my $c=$1; - my $s=$2; - my $e=$3; - # my @data; - foreach my $str (keys %{$molecules{$c}{$s}{$e}}) { - my @data=split(/;/,$molecules{$c}{$s}{$e}{$str}); - push @tag,($s.",".$e.",".$str.",".$data[-1]); -# for (my $i=0;$i<$#old_data ;$i++) { -# $data[$i]+=$old_data[$i]; -# } - my $length=$e-$s+1; - if ($length>30) { - $length=40; - } - push @data,$length; - my $data=join ";",@data;#sample_exp/total_exp/length; - push @sample,$data; - } - - $chr = $c; - $start = $s if $s < $start; - $end = $e if $e > $end; - } - my $tag=join";",@tag; - my $tag_number=@tag; - my ($max_length,$max_p,@cluster_exp)=Max_length(\@sample); - if ($max_length==40) { - $max_length="\>30"; - } - my $cluster_exp=join"\t",@cluster_exp; - my $gff = $chr."\:$start\-$end\t".$max_length."nt\t".$max_p."\t" . $cluster_exp; - print CLUSTER "$chr\:$start\-$end\t$max_length"."nt\t$max_p\t$tag_number\t$tag\n"; - print OUT $gff, "\n"; -} - -sub Max_length{ - my @exp=@{$_[0]}; - my %sample_length; - my $total_exp; - my @each; - for (my $i=0;$i<=$#exp ;$i++) { - my @tag=split/;/,$exp[$i]; - my $length=pop(@tag); - my $exp=pop(@tag); - $sample_length{$length}+=$exp; - $total_exp+=$exp; - for (my $j=0;$j<=$#tag ;$j++) { - $each[$j]+=$tag[$j]; - } - } - my $max=0; - my $max_key; - foreach my $key (sort keys %sample_length) { - my $p=$sample_length{$key}/$total_exp; - if ($p>$max) { - $max=$p; - $max_key=$key; - } - $sample_length{$key}=sprintf("%.2f",$p); - } - return($max_key,$sample_length{$max_key},@each); -} diff -r 9dcffd531c76 -r b6686462d0cb phased_siRNA.pl --- a/phased_siRNA.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,254 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -#use Math::Cephes qw(:hypergeometrics); - -my %opts; -GetOptions(\%opts,"i=s","o=s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; - -open IN,"<$filein"; #input file -open OUT,">$fileout"; #output file - -while (my $aline=) { - chomp $aline; - if ($aline=~/^\#/) { - print OUT $aline,"\tp-value\n"; - next; - } - my @tmp=split/\t/,$aline; - my @pos=split/:|-/,$tmp[0]; - $tmp[1]=~s/nt//; - my $pv=&phase($tmp[1],$pos[1],$pos[2],$tmp[4]); - - print OUT $aline,"\t",$pv,"\n"; -} -close IN; -close OUT; - -sub phase{ - my ($tagL,$start,$end,$tags)=@_; - my @tmp=split/\;/,$tags; - my %tag; - for (my $i=0;$i<@tmp;$i++) { - my @aa=split/\,/,$tmp[$i]; - next if($aa[1]-$aa[0]+1 != $tagL); -# $tag{$aa[0].",".$aa[2]}+=$aa[3] if($aa[2] eq "+"); -# $tag{($aa[1]).",".$aa[2]}+=$aa[3] if($aa[2] eq "-"); - $tag{$aa[0]}+=$aa[3] if($aa[2] eq "+"); - $tag{($aa[1]+3)}+=$aa[3] if($aa[2] eq "-"); - } - - my $pv=&pvalue2(\%tag,$tagL,$start,$end); - - return $pv; -} - -sub pvalue2{ - my ($tag,$tagL,$start,$end)=@_; - - my $p=1; my $pp=1; - foreach my $ccs(keys %{$tag}){ - my $n=0; - my $k=0; - my $K=0; - my $N=0; - - my $cor= $ccs; - my $ss=$cor; - my $ee=($cor+$tagL*10-1)<$end ? $cor+$tagL*10-1 : $end; - - my $max=0; - for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand - { - my $x=$i; - if (defined $$tag{$x}) - { - if ($max<$$tag{$x}) {$max=$$tag{$x};} - $n +=$$tag{$x}; - $N++; - } - } - for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand - { - my $x=$i; - if (defined $$tag{$x}) - { - $k +=$$tag{$x}; - $K++; - } - } - - - return $p if($K<3); - return $p if($max/$n>0.8); - - my $pn=0; - next if($n==$k); - $pn=10*$k/($n-$k)+1; - $pn = $pn ** ($K-2); - $pn = log($pn); - if ($p<$pn) { - $p=$pn; - } - - } - - return $p; - -} - -sub pvalue{ - my ($tag,$tagL,$start,$end)=@_; - - my $p=1; - foreach my $ccs(keys %{$tag}){ - my $n=-1; - my $k=-1; - - my ($cor, $str)=split(/,/, $ccs); - if ($str eq "+") # small RNAs on the Watson strand - { - my $ss=$cor; - my $ee=($cor+$tagL*11-1)<$end ? $cor+$tagL*11-1 : $end; - for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand - { - my $x=$i.","."+"; - if (defined $$tag{$x}) - { - $n=$n+1; - } - } - for (my $i=$ss; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand - { - my $x=$i.","."+"; - if (defined $$tag{$x}) - { - $k=$k+1; - } - } - - for (my $j=$ss-2; $j<=$ee-2; $j++) # calculate n on the antisense strand - { - my $x=$j.","."-"; - if (defined $$tag{$x}) - { - $n=$n+1; - } - } - - for (my $j=$ss+$tagL-2; $j<=$ee-2; $j=$j+$tagL) # calculate k on the antisense strand - { - my $x=$j.","."-"; - if (defined $$tag{$x}) - { - $k=$k+1; - } - } - } - - elsif ($str eq "-") # small RNAs on the Crick strand - { - my $ee=$cor; - my $ss=$cor-$tagL*11+1> $start ? $cor-$tagL*11+1 : $start; - for (my $i=$ss; $i<=$ee; $i++) # calculate n on the sense strand - { - my $x=$i.","."-"; - if (defined $$tag{$x}) - { - $n=$n+1; - } - } - for (my $i=$ss+$tagL-1; $i<=$ee; $i=$i+$tagL) # calculate k on the sense strand - { - my $x=$i.","."-"; - if (defined $$tag{$x}) - { - $k=$k+1; - } - } - - for (my $j=$ss+2; $j<=$ee+2; $j++) # calculate n on the antisense strand - { - my $x=$j.","."+"; - if (defined $$tag{$x}) - { - $n=$n+1; - } - } - for (my $j=$ss+2; $j<=$ee+2; $j=$j+$tagL) # calculate k on the antisense strand - { - my $x=$j.","."+"; - if (defined $$tag{$x}) - { - $k=$k+1; - } - } - } - - next if($k<3); - - my $pn=0; my $N=$tagL*11*2-1; my $M=21; - for (my $w=$k; $w<=$M; $w++) # calculate p-value from n and k - { - my $c=1; - my $rr=1; - my $rw=1; - - for (my $j=0; $j<=$w-1; $j++) - { - $c=$c*($M-$j)/($j+1); - } - for (my $x=0; $x<=$n-$w-1; $x++) - { - $rr=$rr*($N-$M-$x)/($x+1); - } - for (my $y=0; $y<=$n-1; $y++) - { - $rw=$rw*($y+1)/($N-$y); - } - my $pr=$c*$rr*$rw; - - $pn=$pn+$pr; - } - - $p=$pn<$p ? $pn :$p; - - if ($p<0.001) #select and output small RNA clusters with p<0.001 - - { - - return $p; - - } - - } - return $p; -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file --o output file --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb quantify.pl --- a/quantify.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: chentt -#Email: -#Date: 2012-4-6 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","o=s","d=s","h"); -if (!(defined $opts{i} and defined $opts{d} and defined $opts{o}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $input=$opts{'i'}; -my $output=$opts{'o'}; -my $depth=$opts{'d'}; - -open (IN,"<$input")||die"$!"; -open OUT,">$output"; -#my @Total=qw(15797079 18042650 17455254 17295526 18791753 16719596 15150009 18451484 17402501 17729362 19347595 17518516 15699663 16589265 15442892 14012264 14190746 17280260 13213117 12390121 14874304 ); -my @Total=split/\,/,$depth; -#print OUT "#clusterID\tmajor_length\tpercent\n"; -while (my $aline=) { - chomp $aline; - if ($aline=~/^\"/){ - my @title=split/\t/,$aline; - for (my $i=0;$i<@title ;$i++) { - $title[$i]=~s/^\"(\S+)\"$/$1/; - } - my $title=join "\t",@title; - print OUT "\#$title\n"; - next; - } - my @temp=split/\t/,$aline; - print OUT "$temp[0]\t$temp[1]\t$temp[2]"; - my @id=split/:/,$temp[0]; - my @posi=split/-/,$id[1]; - for (my $i=3;$i<@temp;$i++) { - my $rpkm=sprintf("%.2f",$temp[$i]/($posi[1]-$posi[0]+1)/$Total[$i-3]*1000000000); - print OUT "\t$rpkm"; - } - print OUT "\n"; -} -close IN; -close OUT; - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -h -options: --i input cluster file --o output file --d depth --h help -USAGE -exit(1); -} \ No newline at end of file diff -r 9dcffd531c76 -r b6686462d0cb rfam.pl --- a/rfam.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2013/7/19 -#Modified: -#Description: -my $version=1.00; - -use strict; -use Getopt::Long; -use File::Basename; - -my %opts; -GetOptions(\%opts,"i=s","ref=s","index:s","v:i","p:i","o=s","h"); -if (!(defined $opts{i} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; -unless ($fileout=~/\/$/) {$fileout.="/";} -my $rfam=$opts{'ref'}; -my $mis=defined $opts{'v'}? $opts{'v'} : 0; -my $index=defined $opts{'index'} ? $opts{'index'} : ""; -my $threads=defined $opts{'p'} ? $opts{'p'} : 1; - - -#my $time=time(); - -#my $mapdir=$fileout."/rfam_match_".$time; -my $mapdir=$fileout."/rfam_match"; -mkdir $mapdir; -chdir $mapdir; -###check genome index -if (-s $index.".1.ebwt") { -}else{ - &checkACGT($rfam); - `bowtie-build $rfam rfam`; - $index="rfam"; -} - -#chdir "rfam_match_1397022331"; -### genome mapping -`bowtie -v $mis -f -p $threads -k 1 $index $filein --al rfam_mapped.fa --un rfam_not_mapped.fa > rfam_mapped.bwt 2> run.log`; - -sub checkACGT{ - my $string; - open IN,"<$rfam"; - while (my $aline=) { - if ($aline!~/^>/) { - $aline=~s/U/T/gi; - } - $string .=$aline; - } - close IN; - $rfam=basename($rfam); - open OUT, ">$rfam"; - print OUT $string; - close OUT; -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file# input reads fasta/fastq file --ref input file# rfam file, which do not contain miRNAs --index file-prefix #(must be indexed by bowtie-build) The parameter - string must be the prefix of the bowtie index. For instance, if - the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then - the prefix is 'h_sapiens_37_asm'.##can be null --v report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; - --p/--threads number of alignment threads to launch (default: 1) - --o output directory - --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb sRNA_plot.pl --- a/sRNA_plot.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,411 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================================== -# Date: -# Title: -# Comment: Program to plot gene structure -# Input: 1. -# 2. -# 3. -# Output: output file of gene structure graph by html or svg formt -# Test Usage: -#======================================================================================== -#use strict; -my $version=1.00; -use SVG; -use Getopt::Long; -my %opt; -GetOptions(\%opt,"g=s","l=s","span=s","c=s","o=s","out=s","cen:s","mark=s","h"); -if (!( defined $opt{o}) || defined $opt{h}) { -&usage; -} -my $span=$opt{span}; -#my $sample_cloumn=$opt{n}; -my $mark=$opt{mark}; -my @mark=split/\#/,$mark; -my $genelist=$opt{g}; -#===============================Define Attribute========================================== -my %attribute=( - canvas=>{ - 'width'=>1500, - 'height'=>1800 - }, - text=>{ - 'stroke'=>"#000000", - 'fill'=>"none", - 'stroke-width'=>0.5 - }, - line=>{ - 'stroke'=>"black", - 'stroke-width'=>1 - }, - csv=>{ - 'stroke'=>"red", - 'stroke-width'=>0.5 - }, - exon=>{ - 'stroke'=>"black", - 'stroke-width'=>1 - }, - intron=>{ - 'stroke'=>"black", - 'stroke-width'=>1.5 - }, - font=>{ - 'fill'=>"#000000", - 'font-size'=>12, - 'font-size2'=>10, - #'font-weight'=>'bold', - 'font-family'=>"Arial" - #'font-family'=>"ArialNarrow-bold" - }, - rect=>{ - 'fill'=>"lightgreen", - 'stroke'=>"black", - 'stroke-width'=>0.5 - }, - readwidth=>0.5 -); -#############################s#define start coordinate and scale -open(TXT,">$opt{out}"); -open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; -my %length; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @temp=split/\t/,$aline; - $temp[0]=~s/^c/C/; - $length{$temp[0]}=$temp[1]; -} -close LENGTH; -#--------------------------------------------------------------- -open(GENE,"$opt{g}")||die"cannot open the file $opt{g}"; -my %genelist; -while (my $aline=) { - chomp $aline;#LOC_Os01g01280 Chr1 133291 134685 + - next if($aline=~/^\#/); - my @temp=split/\t/,$aline; - if ($temp[1]=~/^Chr(\d)$/) { - $temp[1]="Chr0$1"; - } - push @{$genelist{$temp[1]}},[$temp[0],$temp[2],$temp[3]]; - -} -close GENE; -#my %have_gene; -#foreach my $chr (sort keys %genelist) { -# my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; -# my $start=$genelist[0][1]; -# my $end=$genelist[0][2]; -# for (my $i=0;$i<@genelist ;$i++) { -# if ($gene) { -# } -# } -#} - -my %gene_desity; -foreach my $chr (sort keys %genelist) { - my @genelist=sort{$a->[1] <=> $b->[1]}@{$genelist{$chr}}; - for (my $i=0;$i<@genelist ;$i++) { - my $start=int($genelist[$i][1]/$span); - my $end=int($genelist[$i][2]/$span); - #my @t_rpkm=split/\t/,$target_rpkm{$genelist[$i][0]}; - if ($start==$end) { - $gene_desity{$chr}[$start]++; - } - else{ - for (my $k=$start;$k<=$end ;$k++) { - $gene_desity{$chr}[$k]++; - } - } - } -} -#------------------------------------------region_gene_number------------------------- -my $max_gene_number=0; -my $total=0; -foreach my $chr (sort keys %genelist) { - for (my $i=0;$i<@{$gene_desity{$chr}} ;$i++) { - if (!(defined($gene_desity{$chr}[$i]))) { - $gene_desity{$chr}[$i]=0; - } - if ($gene_desity{$chr}[$i]>$max_gene_number) { - $max_gene_number=$gene_desity{$chr}[$i]; - #print "$gene_desity{$chr}[$i]\n"; - } - #print TXT "$i\t$gene_desity[$i]\n"; - $total+=$gene_desity{$chr}[$i]; - #print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; - } -} -#print "Gene max:$max_gene_number\ntotal:$total\n"; - -#--------------------------------------------------------------- -my %centromere; -if (defined($opt{cen})) { - open CEN,"$opt{cen}"; - while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @temp=split/\t/,$aline; - $temp[0]=~s/^c/C/; - $centromere{$temp[0]}[0]=$temp[1]; - $centromere{$temp[0]}[1]=$temp[2]; - } - close CEN; -} - -#--------------------------------------------------------------- -my $max_length=0; -foreach my $chr (keys %length) { - if ($max_length<$length{$chr}) { - $max_length=$length{$chr}; - } - print "$chr\n"; -} -#====================================cluster data======================================= -open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; -my %cluster; -my %cluster_density; -#my @sample=qw(39B3 3PA3 3LC3); -my @cluster_non_add; -while (my $aline=) { - next if($aline=~/^\#/); - chomp $aline;##Chr MajorLength Percent end 19B1 - my @temp=split/\t/,$aline; - my @ID=split/\:/,$temp[0]; - my @posi=split/\-/,$ID[1]; - my @all_rpkm=@temp; - shift @all_rpkm; - shift @all_rpkm; - shift @all_rpkm; -# for (my $s=0;$s<@all_rpkm ;$s++) {#log transfer -# $all_rpkm[$s]=log2($all_rpkm[$s]); -# } - push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],@all_rpkm];#ID start end rpkm(19B1,1PA1,1LC1); -} -close CLUSTER; -my %max_cluster; -my $chr_number=0; -print "@mark\n$mark\n"; -foreach my $chr (sort keys %cluster) { - for (my $i=0;$i<@mark ;$i++) { - $max_cluster{$chr}[$i]=0; - } - $chr_number++; -} -foreach my $chr (sort keys %cluster) { - @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; - for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { - for (my $s=0;$s<@mark;$s++) { - if ($cluster{$chr}[$i][3+$s]>$max_cluster{$chr}) { - $max_cluster{$chr}[$s]=$cluster{$chr}[$i][3+$s]; - } - } - } - -} -foreach my $chr (sort keys %max_cluster) { - for (my $s=0; $s<@mark;$s++) { - # print "$max_cluster{$chr}[$s]\n"; - } -} -#--------------------------------------------------------------------------------------- -foreach my $chr(keys %cluster) { - for(my $i=0;$i<$#{$cluster{$chr}};$i++) { - my $start=int($cluster{$chr}[$i][1]/$span); - my $end=int($cluster{$chr}[$i][2]/$span); - if ($start==$end) { - for (my $s=0;$s<@mark ;$s++) { - $cluster_density{$chr}[$start][$s]+=$cluster{$chr}[$i][3+$s]; - } - - } - else{ - for (my $m=$start;$m<=$end ;$m++) { - for (my $s=0;$s<@mark ;$s++) { - $cluster_density{$chr}[$m][$s]+=$cluster{$chr}[$i][3+$s]; - } - } - } - } -} -my %max_cluster_density; -my $max_all_density=0; -foreach my $chr (sort keys %cluster) {# - for (my $s=0;$s<@mark ;$s++) { - for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { - $max_cluster_density{$chr}[$s]=0; - } - } - -} -foreach my $chr (sort keys %cluster_density) { - print "$#{$cluster_density{$chr}}\n"; - for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { - print TXT "$chr\t$k"; - for (my $s=0;$s<@mark;$s++) { - if (!(defined($cluster_density{$chr}[$k][$s]))) { - $cluster_density{$chr}[$k][$s]=0; - } - if ($cluster_density{$chr}[$k][$s]>$max_cluster_density{$chr}[$s]) { - $max_cluster_density{$chr}[$s]=$cluster_density{$chr}[$k][$s]; - } - if ($cluster_density{$chr}[$k][$s]>$max_all_density) { - $max_all_density=$cluster_density{$chr}[$k][$s]; - } - print TXT "\t$cluster_density{$chr}[$k][$s]"; - } - print TXT "\n"; - } -} -print "max density: $max_all_density\n"; -#-------------------------------------------------------------------- -my $top_margin=30; -my $tail_margin=30; -my $XOFFSET=50; -my $YOFFSET=60; -my $chr_length=600; -my $Xscale=$chr_length/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 -#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 -#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 -#========================================New canvas============================ -#### Starting #### -#新建画布 -my $width=1000; -my $heigth=100+130*$chr_number; -my $svg=SVG->new(width=>$width, height=>$heigth); -#画图起始点 -my $canvas_start_x=$XOFFSET; -my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺画线 -my $canvas_start_y=$YOFFSET; -my $canvas_end_y=$YOFFSET; -my $chr_heigth=$heigth-$YOFFSET-$tail_margin; -print "chr number:$chr_number\n"; -my $one_chr_heigth=$chr_heigth/$chr_number; -my $Yscale=($one_chr_heigth-15)/$max_all_density; -#my $chr_width=$YOFFSET; -#my $chr_start_y; -#my $chr_end_y; -#my $Yscale=0.01; -#=======================================title of the graph=============================== -#my $span_k=$span/1000; -#$svg->text('x',$width/2,'y',$YOFFSET-20,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',15,'font-family',$attribute{font}{'font-family'},'-cdata',"Clusters rpkm/"."$span_k"."kb Distribution"); -#=======================================the top max chr line============================= -$svg->line(id=>'l1',x1=>$canvas_start_x,y1=>$canvas_start_y,x2=>$canvas_end_x,y2=>$canvas_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); -$long_scale=int ($max_length/10);#十等分大刻度 -#大坐标刻度 -for ($i=0;$i<=10;$i++) { - my $long_x_start=$XOFFSET+$long_scale*$i*$Xscale; - my $long_x_end=$long_x_start; - my $long_y_start=$YOFFSET; - my $long_y_end=$YOFFSET-5; - $svg->line('x1',$long_x_start,'y1',$long_y_start,'x2',$long_x_end,'y2',$long_y_end,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - my $Bscale=$long_scale*$i; - my $cdata=int ($Bscale/1000000); - $svg->text('x',$long_x_start,'y',$long_y_start-10,'style','fill:black;text-anchor:middle','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$cdata."M"); -} -#========================================================================================= -my $cc=1; -foreach my $chr (sort keys %length) { - my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; - my $chr_start_x=$XOFFSET; - my $chr_start_y=$YOFFSET+$cc*$one_chr_heigth; - my $chr_end_y=$chr_start_y; - #$chr_start_y+=$chr_width; - #$chr_end_y+=$chr_width; -# for (my $i=0;$i<@{$gene_desity{$chr}};$i++) { -# print "$chr\t$i\t$gene_desity{$chr}[$i]\n"; -# my $red=$gene_desity{$chr}[$i]/$max_gene_number*255; -# my $green=$gene_desity{$chr}[$i]/$max_gene_number*255; -# print "$red\t$green\t0\n"; -# $svg->rect('x',$chr_start_x+$i*$span*$Xscale,'y',$chr_start_y,'width',$span*$Xscale,'height',8,'stroke',"rgb($red,$green,0)",'stroke-width',0.1,'fill',"rgb($red,$green,0)"); -# } - - $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); - my $m_length=$length{$chr}%1000000; - $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$m_length."M"); - - - if (defined($centromere{$chr}[0])) { - $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y-2,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); - } - for (my $s=0;$s<@mark ;$s++) { - for (my $i=0;$i<$#{$cluster_density{$chr}}-1 ;$i++) { - #if ($cluster_density{$chr}[$i]*$Yscale>40) { - #$cluster_density{$chr}[$i]=40/$Yscale; - #$svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); - #} - #print "$i\t$cluster_density{$chr}[$i][$s]\t$cluster_density{$chr}[$i+1][$s]\n"; - my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; - my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; - my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i][$s]*$Yscale; - my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][$s]*$Yscale; - my $c_red=($s+1)/@mark*255; - $svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"rgb($c_red,125,0)",'stroke-width',0.3); - } - - } - #=======Y axis - $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_start_x,y2=>$chr_start_y-$one_chr_heigth+15,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - #=======Y axis ===>3 xiaoge - my $s10=1; - my $e10=0; - my $chr_max=$max_all_density; - while ($chr_max>10) { - $chr_max=int($chr_max/10); - $s10=$s10*10; - $e10++; - } - $chr_max=$chr_max/2; - #print "*****$max_all_density\t$chr_max\t$s10\n"; - for (my $i=1;$i<3 ;$i++) { - my $y1=$chr_start_y-$chr_max*$s10*$Yscale*$i; - my $xiaoge_Y=$chr_max*$i; - $svg->line('x1',$chr_start_x,'y1',$y1,'x2',$chr_start_x+3,'y2',$y1,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - $svg->text('x',$chr_start_x-26,'y',$y1+4,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',8,'font-family',$attribute{font}{'font-family'},'-cdata',$xiaoge_Y."e".$e10); - } - $cc++; -} - -for (my $s=0;$s<@mark ;$s++) { - my $c_red=($s+1)/@mark*255; - print "**$c_red\n"; - $svg->line('x1',$canvas_end_x+100,'y1',$YOFFSET+$s*20+30,'x2',$canvas_end_x+130,'y2',$YOFFSET+$s*20+30,'stroke',"rgb($c_red,125,0)",'stroke-width',1); - $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+$s*20+5+30,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',$mark[$s]); -} -# -# -if (defined($opt{cen})) { - $svg->rect('x',$canvas_end_x+100,'y',$YOFFSET+@mark*20+30,'width',30,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); - $svg->text('x',$canvas_end_x+150,'y',$YOFFSET+@mark*20+30+5,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',10,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); -} - -close TXT; - -open (OUT,">$opt{o}"); -print OUT $svg->xmlify(); - -sub log2 { - my $n = shift; - return log($n)/log(2); -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -options: --g genelist --span --n sample cloumn --mark sample name --o output graph file name with html or svg extension --c cluster file input --out txt output --l length of chr --cen centromere --h help -USAGE -exit(1); -} \ No newline at end of file diff -r 9dcffd531c76 -r b6686462d0cb sRNA_rpkm_distribution_along_genome.pl --- a/sRNA_rpkm_distribution_along_genome.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,264 +0,0 @@ -#!/usr/bin/perl -w -#========================================================================================== -# Date: -# Title: -# Comment: Program to plot gene structure -# Input: 1. -# 2. -# 3. -# Output: output file of gene structure graph by html or svg formt -# Test Usage: -#======================================================================================== -#use strict; -my $version=1.00; -use SVG; -use Getopt::Long; -my %opt; -GetOptions(\%opt,"span=s","c=s","o=s","out=s","l=s","cen:s","n=s","mark=s","h"); -if (!( defined $opt{o}) || defined $opt{h}) { -&usage; -} -my $span=$opt{span}; -my $sample_cloumn=$opt{n}; -my $mark=$opt{mark}; -#===============================Define Attribute========================================== -my %attribute=( - canvas=>{ - 'width'=>1500, - 'height'=>1800 - }, - text=>{ - 'stroke'=>"#000000", - 'fill'=>"none", - 'stroke-width'=>0.5 - }, - line=>{ - 'stroke'=>"black", - 'stroke-width'=>1 - }, - csv=>{ - 'stroke'=>"red", - 'stroke-width'=>0.5 - }, - exon=>{ - 'stroke'=>"black", - 'stroke-width'=>1 - }, - intron=>{ - 'stroke'=>"black", - 'stroke-width'=>1.5 - }, - font=>{ - 'fill'=>"#000000", - 'font-size'=>12, - 'font-size2'=>10, - #'font-weight'=>'bold', - 'font-family'=>"Arial" - #'font-family'=>"ArialNarrow-bold" - }, - rect=>{ - 'fill'=>"lightgreen", - 'stroke'=>"black", - 'stroke-width'=>0.5 - }, - readwidth=>0.5 -); -#############################s#define start coordinate and scale -open(TXT,">$opt{out}"); -open(LENGTH,"$opt{l}")||die"cannot open the file $opt{l}"; -my %length; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @temp=split/\t/,$aline; - $temp[0]=~s/^c/C/; - $length{$temp[0]}=$temp[1]; -} -close LENGTH; -#--------------------------------------------------------------- -my %centromere; -if (defined($opt{cen})) { - open(CEN,"$opt{cen}")||die"cannot open the file $opt{cen}"; - while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - my @temp=split/\t/,$aline; - $temp[0]=~s/^c/C/; - $centromere{$temp[0]}[0]=$temp[1]; - $centromere{$temp[0]}[1]=$temp[2]; - } - close CEN; -} - -#--------------------------------------------------------------- -my $max_length=0; -foreach my $chr (keys %length) { - if ($max_length<$length{$chr}) { - $max_length=$length{$chr}; - } - print "$chr\n"; -} -#====================================cluster data======================================= -open(CLUSTER,"$opt{c}")||die"cannot open the file $opt{c}"; -my %cluster; -my %cluster_density; -#my @sample=qw(39B3 3PA3 3LC3); -my @cluster_non_add; -while (my $aline=) { - next if($aline=~/^\#/); - chomp $aline;##ID chr strand start end 19B1 - my @temp=split/\t/,$aline; - my @ID=split/\:/,$temp[0]; - my @posi=split/\-/,$ID[1]; - push @{$cluster{$ID[0]}},[$temp[0],$posi[0],$posi[1],$temp[2+$sample_cloumn]];#ID start end rpkm(19B1,1PA1,1LC1); -} -close CLUSTER; -my %max_cluster; -foreach my $chr (sort keys %cluster) { -# for (my $i=0;$i<3 ;$i++) { -# $max_cluster{$chr}[$i]=0; -# } - $max_cluster{$chr}=0 -} -foreach my $chr (sort keys %cluster) { - @{$cluster{$chr}}=sort{$a->[1] <=> $b->[1]}@{$cluster{$chr}}; - #for (my $s=0;$s<3;$s++) { - for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { - if ($cluster{$chr}[$i][3]>$max_cluster{$chr}) { - $max_cluster{$chr}=$cluster{$chr}[$i][3]; - } - } - #} - -} -#--------------------------------------------------------------------------------------- -foreach my $chr(keys %cluster) { - for(my $i=0;$i<$#{$cluster{$chr}};$i++) { - my $start=int($cluster{$chr}[$i][1]/$span); - my $end=int($cluster{$chr}[$i][2]/$span); - if ($start==$end) { - #for (my $j=0;$j<3 ;$j++) { - $cluster_density{$chr}[$start]+=$cluster{$chr}[$i][3]; - #} - - } - else{ - for (my $m=$start;$m<=$end ;$m++) { - #for (my $j=0;$j<3 ;$j++) { - $cluster_density{$chr}[$m]+=$cluster{$chr}[$i][3]; - #} - } - } - } -} -my %max_cluster_density; -foreach my $chr (sort keys %cluster) {# - #for (my $i=0;$i<3 ;$i++) { - for (my $i=0;$i<$#{$cluster{$chr}} ;$i++) { - $max_cluster_density{$chr}=0; - } - #} -} -foreach my $chr (sort keys %cluster) { - #for (my $i=0;$i<3;$i++) { - for (my $k=0;$k<$#{$cluster_density{$chr}} ;$k++) { - if (!(defined($cluster_density{$chr}[$k]))) { - $cluster_density{$chr}[$k]=0; - } - if ($cluster_density{$chr}[$k]>$max_cluster_density{$chr}) { - $max_cluster_density{$chr}=$cluster_density{$chr}[$k]; - } - print TXT "$chr\t$k\t$cluster_density{$chr}[$k]\n"; - } - #} -} -#-------------------------------------------------------------------- -my $XOFFSET=50; -my $YOFFSET=60; -#my $length=$end-$start+1; -my $Xscale=600/$max_length;#定义X轴比例尺 1:1000 x轴的坐标长度都要按照此比例尺换算 -#my $high_cov=$high_cov9B1=0.5;#定义峰图最高峰 -#my $Yscale=1/$high_cov;#定义Y轴比例尺 1:60 y轴的坐标长度都要按照此比例尺换算 -#========================================New canvas============================ -#### Starting #### -#新建画布 -my $svg=SVG->new(); -#画图起始点 -my $canvas_start_x=$XOFFSET; -my $canvas_end_x=$XOFFSET+$max_length*$Xscale;#按照比例尺画线 -my $canvas_start_y=$YOFFSET; -my $canvas_end_y=$YOFFSET; - -my $chr_width=$YOFFSET; -my $chr_start_y; -my $chr_end_y; -my $Yscale=0.01; -foreach my $chr (sort keys %length) { - my $chr_start_x=$XOFFSET; - my $chr_end_x=$XOFFSET+$length{$chr}*$Xscale; - $chr_start_y+=$chr_width; - $chr_end_y+=$chr_width; - $svg->line(x1=>$chr_start_x,y1=>$chr_start_y,x2=>$chr_end_x,y2=>$chr_end_y,'stroke',$attribute{line}{'stroke'},'stroke-width',$attribute{line}{'stroke-width'}); - $svg->text('x',$XOFFSET-40,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$chr); - $svg->text('x',$chr_end_x+20,'y',$chr_start_y,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',$length{$chr}); - - - if (defined($centromere{$chr}[0])) { - $svg->rect('x',$XOFFSET+$centromere{$chr}[0]*$Xscale,'y',$chr_start_y,'width',($centromere{$chr}[1]-$centromere{$chr}[0]+1)*$Xscale,'height',5,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); - } - for (my $i=0;$i<$#{$cluster_density{$chr}} ;$i++) { - if ($cluster_density{$chr}[$i]*$Yscale>40) { - $cluster_density{$chr}[$i]=40/$Yscale; - $svg->rect('x',$XOFFSET+$i*$span*$Xscale,'y',$chr_start_y-45,'width',$span*$Xscale,'height',5,'stroke',"green",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"green"); - } - my $cluster_density_start_x=$XOFFSET+$i*$span*$Xscale; - my $cluster_density_end_x=$XOFFSET+($i+1)*$span*$Xscale; - my $cluster_density_start_y=$chr_start_y-$cluster_density{$chr}[$i]*$Yscale; - #my $cluster_density_end_y=$chr_start_y-$cluster_density{$chr}[$i+1][0]*$Yscale; - #$svg->line('x1',$cluster_density_start_x,'y1',$cluster_density_start_y,'x2',$cluster_density_end_x,'y2',$cluster_density_end_y,'stroke',"red",'stroke-width',$attribute{csv}{'stroke-width'}); - $svg->rect('x',$cluster_density_start_x,'y',$chr_start_y-$cluster_density{$chr}[$i]*$Yscale,'width',$span*$Xscale,'height',$cluster_density{$chr}[$i]*$Yscale,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); - } - - $chr_width=50; - - #$svg->rect('x',$c_non_add_start_x,'y',$c_non_add_start_y,'width',$cluster_non_add_width,'height',$cluster_non_add_heigth,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); -} - -my $span_k=$span/1000; -$svg->text('x',200,'y',$chr_start_y+20,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"$mark sRNA rpmk \/ $span_k kb"); - -$svg->rect('x',600,'y',500,'width',10,'height',10,'stroke',"red",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"red"); -$svg->text('x',620,'y',510,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"sRNA rpkm"); - -if (defined($opt{cen})) { - $svg->rect('x',600,'y',520,'width',10,'height',10,'stroke',"blue",'stroke-width',$attribute{intron}{'stroke-width'},'fill',"blue"); - $svg->text('x',620,'y',530,'style','fill:black;text-anchor:left','stroke',$attribute{text}{'stroke'},'stroke-width',$attribute{text}{'stroke-width'},'font-size',12,'font-family',$attribute{font}{'font-family'},'-cdata',"centromere"); -} - -close TXT; -open (OUT,">$opt{o}"); -print OUT $svg->xmlify(); - -sub log2 { - my $n = shift; - return log($n)/log(2); -} - -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -options: --span --n sample cloumn --mark sample name --o output graph file name with html or svg extension --c cluster file input --out txt output --l length of chr --cen centromere --h help -USAGE -exit(1); -} \ No newline at end of file diff -r 9dcffd531c76 -r b6686462d0cb sam2Bed_bowtie.pl --- a/sam2Bed_bowtie.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ -#!/usr/bin/perl -w -#Filename: -#Author: Tian Dongmei -#Email: tiandm@big.ac.cn -#Date: 2011/11/7 -#Modified: -#Description: sam2BED -my $version=1.00; - -use strict; -use Getopt::Long; - -my %opts; -GetOptions(\%opts,"i=s","mark=s","o=s","h"); -if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments -&usage; -} - -my $filein=$opts{'i'}; -my $fileout=$opts{'o'}; -my $mark=$opts{'mark'}; -my @sample=split/\#/,$mark; -$mark=join"\t",@sample; -open OUT,">$fileout"; #output file -print OUT "#chr\tstrand\tstart\tend\t$mark\n"; - -open IN,"<$filein"; #input file -my $Tags_num=0; -my @read_num; -#print OUT "#chr\tstart\tend\tnum\t<=20\t21\t22\t23\t24\t>=25\n"; -while (my $aline=) { - chomp $aline; - next if($aline=~/^\@/); - my @tmp=split/\t/,$aline; - my $strand=$tmp[1]; - my $start=$tmp[3]+1; - my $length=length($tmp[4]); - my $end=$start+$length-1; - my $hit=$tmp[6]+1; - #======express caculate weighted=================================== - my $exp; - my @tempID=split/\:/,$tmp[0]; - my @exp=split/\_/,$tempID[1]; - pop @exp; - for (my $j=0;$j<@exp ;$j++) { - #my @tempID1=split/\=/,$tempID[$j]; - $exp[$j]=sprintf("%.2f",$exp[$j]/$hit); - $read_num[$j]+=$exp[$j]; - #print OUT "\t$exp"; - } - $exp=join "\t",@exp; - print OUT $tmp[2],"\t",$strand,"\t",$start,"\t",$end,"\t",$exp,"\n"; - $Tags_num++; - -} -print "Total Tags numer: $Tags_num\n"; -my $read_number=join "\t",@read_num; -print "Each sample numer: $read_number\n"; -close IN; -close OUT; -sub usage{ -print <<"USAGE"; -Version $version -Usage: -$0 -i -o -options: --i input file --mark sampleA sampleB sampleC..... --o output file --h help -USAGE -exit(1); -} - diff -r 9dcffd531c76 -r b6686462d0cb siRNA.pl --- a/siRNA.pl Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,521 +0,0 @@ -#!/usr/bin/perl -w -my $version=1.00; -use strict; -use warnings; -use Getopt::Long; -use Getopt::Std; -use threads; -use threads::shared; -use Parallel::ForkManager; -use lib '/leofs/biotrans/chentt/perl_module/'; -#perl ../siRNA.pl -i config -g /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome.fa -f /share_bio/hs4/disk3-4/Reference/Plants/Rice_TIGR/Reference/TIGR/version_6.1/all.dir/all.gff3 -path /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/ -o /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test -t 3 -rfam /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/Rfam.fasta -idx /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/genome -idx2 /leofs/biotrans/projects/rice/smallRNA/sRNA_package/bin/test/ref/rfam -deg deg -n 25 -nat class/nat_1 -repeat class/repeat_1 -cen centromere_TIGR.txt -format fastq -print " -##################################### -# # -# sRNA cluster # -# # -##################################### -"; -########################################################################################### -my $usage="$0 -Options: --i input file# raw data file --tag string #raw data sample name --g genome file --f gff file - --o workdir file --path script path --t int, number of threads [1] --format fastq, fq, fasta or fa --idx string, genome file index, file-prefix #(must be indexed by bowtie-build) The parameter - string must be the prefix of the bowtie index. For instance, if - the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then - the prefix is 'h_sapiens_37_asm'.##can be null --mis int number of allowed mismatches when mapping reads to genome, default 0 --rfam string, input file# rfam database file. --idx2 string, rfam file index, file-prefix #(must be indexed by bowtie-build) The parameter - string must be the prefix of the bowtie index. For instance, if - the first indexed file is called 'h_sapiens_37_asm.1.ebwt' then - the prefix is 'h_sapiens_37_asm'.##can be null - --v int report end-to-end hits w/ <=v mismatches; ignore qualities,default 0; used in rfam alignment - --a string, ADAPTER string. default is ATCTCGTATG. --n int max hits number,default 25; used in genome alignment --d int distance of tag to merged a cluster; default 100 --p cluster method F :conventional default is F - T :NIBLES --l int the length of the upstream and downstream,default 1000;used in position annotate - --nat natural antisense transcripts file --repeat repeat information file out of Repeatmasker --deg file config of de sample --cen centromere file input --span plot span, default 50000 -"; - -my %options; -GetOptions(\%options,"i:s@","tag:s@","g=s","f=s","o=s","a:s","path:s","p=s","format=s","nat:s","repeat:s","deg:s","n:i","mis:i","rfam:s","t:i","v:i","d:i","l:i","idx:s","idx2:s","cen:s","span:s","h"); -#print help if that option is used -if($options{h}){die $usage;} - -my @filein=@{$options{'i'}}; -my @mark=@{$options{'tag'}}; - -#my $config=$options{'i'}; -my $genome_fa=$options{'g'}; -my $gff=$options{'f'}; - - -########################################################################################## -my $predir=`pwd`; -chomp $predir; -my $workdir=defined($options{'o'}) ? $options{'o'}:$predir; - -my $path=$options{'path'}; - -my $t=defined($options{'t'})? $options{'t'}:1; #threads number - -my $mis=defined $options{'mis'} ? $options{'mis'}:0; - -my $mis_rfam=defined $options{'v'} ? $options{'v'}:0; - -my $hit=defined $options{'n'}?$options{'n'}:25; - -my $distance_of_merged_tag=defined $options{'d'} ? $options{'d'}:100; - -my $up_down_dis=defined $options{'l'} ?$options{'l'}:1000; - -my $cluster_mothod=defined $options{'p'}?$options{'p'}:"F"; - -my $format=$options{'format'}; -#if ($format ne "fastq" && $format ne "fq" && $format ne "fasta" && $format ne "fa") { -# die "Parameter \"-format\" is error! Parameter is fastq, fq, fasta or fa\n"; -#} - -my $adpter="ATCTCGTATG"; #adapter -if (defined $options{'a'}) {$adpter=$options{'a'};} - - -my $phred_qv=64; -my $sample_number; -my ($dir,$dir_tmp); -################################ MAIN ################################################## -print "\ncluster program start:"; -my $time=Time(); -make_dir_tmp(); - -my @clip; -my $mark; -my $sample_mark; - -my $config=$dir."/input_config"; -open CONFIG,">$config"; - for (my $i=0;$i<@filein;$i++) { - print CONFIG $filein[$i],"\t",$mark[$i],"\n"; - } -close CONFIG; -if (@filein != @mark) { - die "Maybe config file have some wrong!!!\n"; -} -$sample_number=@mark; -$mark=join "\t",@mark; -$sample_mark=join "\#",@mark; - - -#read_config(); - -trim_adapter_and_filter(); - -my $filter_out=$dir."preProcess\/"."collapse_reads_out.fa";## raw clean data -my $data2=$filter_out; ### mirbase not mapped reads -my $data3=$dir."\/rfam_match\/rfam_not_mapped\.fa"; ### rfam not mapped reads -my $bed=$dir."cluster\/"."sample\.bed"; -my $read=$dir."cluster\/"."sample_reads\.cluster"; -my $read_txt=$dir."cluster\/"."cluster\.txt"; -my $rpkm=$dir."cluster\/"."sample_rpkm\.cluster"; -my $preprocess; -my $cluster_file; -my $annotate_dir; -my $deg_dir; -my $plot_dir; -my %id; -for (my $i=0;$i<@mark ;$i++) { - $id{$mark[$i]}=$i+4; -} - -print "\n######## tiandm test start ###########\n"; -print "\@mark: @mark\n\%id keys number:"; -print scalar keys %id; -print "\n"; -foreach my $kyess (keys %id){ - print $kyess," --> $id{$kyess}\n"; -} -print "\n######## tiandm test end ############\n"; -group_and_filter(); #collapse reads to tags - -rfam(); - -my @map_read; -my $map_tag=0; -genome(); - -bwt2bed(); - -cluster(); - -quantify(); - -phase(); - -if (defined($options{'nat'})&&defined($options{'repeat'})) { - class(); -} -else{ - get_genelist(); -} - -annotate(); - -genome_length(); - -plot(); - -my @pairdir; -if (defined($options{'deg'})) { - dec(); - infor_merge(); -} -else{infor_merge_no_dec()} -html(); -print "\ncluster program end:"; -Time(); -############################sub program################################################### -sub make_dir_tmp{ - - #make temporary directory - if(not -d "$workdir\/cluster_runs"){ - mkdir("$workdir\/cluster_runs"); - mkdir("$workdir\/cluster_runs\/ref\/"); - } - - $dir="$workdir\/cluster_runs\/"; - #print STDERR "mkdir $dir\n\n"; - return; -} - -#sub read_config{ -# open IN,"<$config"; -# while (my $aline=) { -# chomp $aline; -# my @tmp=split/\t/,$aline; -# push @filein,$tmp[0]; -# push @mark,$tmp[1]; -# } -# close IN; -# if (@filein != @mark) { -# die "Maybe config file have some wrong!!!\n"; -# } -# $sample_number=@mark; -# $mark=join "\t",@mark; -# $sample_mark=join "\#",@mark; -#} - - -sub trim_adapter_and_filter{ - my $time=time(); - $preprocess=$dir."preProcess/"; - mkdir $preprocess; - my $can_use_threads = eval 'use threads; 1'; - if ($can_use_threads) { - # Do processing using threads - my @filein1=@filein; my @mark1=@mark; - while (@filein1>0) { - my @thrs; my @res; - for (my $i=0;$i<$t ;$i++) { - last if(@filein1==0); - my $in=shift @filein1; - my $out=shift @mark1; - push @clip,$dir."preProcess\/$out\_clip\.fq"; - $thrs[$i]=threads->create(\&clips,$in,$out); - } - for (my $i=0;$i<@thrs;$i++) { - $res[$i]=$thrs[$i]->join(); - } - } - } - else { -# Do not processing using threads - for (my $i=0;$i<@filein ;$i++) { - my $in=$filein[$i]; - my $out=$mark[$i]; - push @clip,$dir."preProcess\/$out\_clip\.fq"; - &clips($in,$out); - } - } -} - -sub clips{ - my ($filein,$fileout)=@_; - my $adapter=$dir."preProcess\/$fileout\_clip\.fq"; - if($format eq "fq" || $format eq "fastq"){ - my $clip=`fastx_clipper -a $adpter -M 6 -Q $phred_qv -i $filein -o $adapter`; - } - if($format eq "fa" || $format eq "fasta"){ - my $clip=`fastx_clipper -a $adpter -M 6 -i $filein -o $adapter`; - } - #my $clean=$dir."preProcess\/$fileout\_clean.fq"; - #my $filter=`filterReadsByLength.pl -i $adapter -o $clean -min 18 -max 40 `; - return $fileout; -} - -sub group_and_filter{ - #my ($ins,$data)=@_; - my @ins=@clip; - my $str=""; - my $group_out_file=$dir."preProcess\/"."collapse_reads.fa"; - #print "$$ins[0]\t$$ins[0]\n"; - for (my $i=0;$i<@clip;$i++) { - $str .="-i $clip[$i] "; - #print "$$ins[$i]\n"; - } - my $group=`perl $path\/collapseReads2Tags.pl $str -mark seq -o $group_out_file -format $format`; - print "perl $path\/collapseReads2Tags.pl $str -mark seq -o $group_out_file -format $format\n\n"; - - my $l_out=$dir."preProcess\/"."collapse_reads_18-40.fa"; - my $length_f=`perl $path\/filterReadsByLength_1.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $sample_mark`; - print "perl $path\/filterReadsByLength_1.pl -i $group_out_file -o $l_out -min 18 -max 40 -mark $sample_mark\n\n"; - my $cout_f=`perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark`; - print "perl $path\/filterReadsByCount.pl -i $l_out -o $filter_out -mark $sample_mark\n\n"; - my $plot_l_D=`perl $path/Length_Distibution.pl -i $dir/preProcess/reads_length_distribution_after_count_filter.txt -o $dir/preProcess/length.html `; - print "perl $path\/Length_Distibution.pl -i $dir\/preProcess\/reads_length_distribution_after_count_filter.txt -o $dir\/preProcess\/length\.html\n\n"; - return 0; -} - -sub rfam{ - if (defined $options{'idx2'}) { - system("perl $path\/rfam.pl -i $data2 -ref $options{rfam} -v $mis_rfam -p $t -o $dir -index $options{idx2}"); - }else{ - system("perl $path\/rfam.pl -i $data2 -ref $options{rfam} -v $mis_rfam -p $t -o $dir"); - } - my $tag=join "\\;" ,@mark; - my $rfam_count=`perl $path\/count_rfam_express.pl -i $dir\/rfam_match\/rfam_mapped.bwt -tag $tag -o $dir\/rfam_match\/rfam_non-miRNA_annotation.txt`; - return 0; -} -sub genome{ - if(defined $options{'idx'}){ - system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir -index $options{idx}") ; - }else{ - system("perl $path\/matching.pl -i $data3 -g $genome_fa -v $mis -p $t -r $hit -o $dir ") ; - } - #=================== mapping sta =================================================== - my $map_file=$dir."genome_match\/genome_mapped\.fa"; - open (MAP,"<$map_file")||die"$!"; - print "\n#each sample mapping reads sta:\n\n"; - print "#$mark\ttotal\n"; - while (my $ID=) { - chomp $ID; - my @tmp=split/\:/,$ID; - my @exp=split/\_/,$tmp[1]; - $exp[-1] =~ s/^x//; - for (my $i=0;$i<@exp ;$i++) { - $map_read[$i]+=$exp[$i]; - } - $map_tag++; - my $seq=; - } - my $map_read=join"\t",@map_read; - print "$map_read\n\n"; - print "#total mapped tags:$map_read\n\n"; - close MAP; - return 0; -} - -sub bwt2bed{ - $cluster_file=$dir."cluster\/"; - mkdir ("$cluster_file"); - print "sam file changed to bed file\n"; - my ($file) = $dir."genome_match\/genome_mapped\.bwt"; - - my $sam2bed=`perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed `; - print "perl $path\/sam2Bed_bowtie.pl -i $file -mark $sample_mark -o $bed\n\n"; - return 0; -} - -sub cluster{ - print "tags is ready to merged clusters\n\n"; - my ($file) =$bed; - if ($cluster_mothod eq "F") { - my $cluster=`perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt`; - print "Using converntional method\n perl $path\/conventional.pl -i $file -d $distance_of_merged_tag -n $sample_number -mark $sample_mark -o $read -t $read_txt\n\n"; - } - elsif($cluster_mothod eq "T"){ - my $cluster=`perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $read_txt -k $sample_mark`; - print "Using nibls method\n perl $path\/nibls.pl -f $file -m $distance_of_merged_tag -o $read -t $dir\/cluster.txt -k $sample_mark\n\n"; - } - else{print "\-p is wrong!\n\n";} - return 0; -} - - -sub quantify{ - print "clusters is ready to quantified\n\n"; - my @depth=@map_read; - pop @depth; - my $depth=join ",",@depth; - my $quantify=`perl $path\/quantify.pl -i $read -d $depth -o $rpkm`; - print "perl $path\/quantify.pl -i $read -d $depth -o $rpkm\n\n\n"; - return 0; -} - -sub phase{ - $annotate_dir=$dir."annotate\/"; - mkdir ("$annotate_dir"); - print "clusters is to predict phase siRNA\n"; - my $phase=`perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out`; - print "perl $path\/phased_siRNA.pl -i $read_txt -o $annotate_dir\/phase.out\n\n\n"; - return 0; -} - -sub class{ - print "clusters is ready to annotate by sources\n\n"; - my $nat=$options{'nat'}; - my $repeat=$options{'repeat'}; - my $class=`perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt`; - print "perl $path\/ClassAnnotate.pl -i $rpkm -g $gff -n $nat -r $repeat -p $annotate_dir\/phase.out -o $annotate_dir\/sample_class.anno -t $annotate_dir\/nat.out -l $dir\/ref\/genelist.txt\n\n"; -} - -sub annotate{ - print "clusters is ready to annotate by gff file\n\n"; - my $file; - if (defined($options{'nat'})&&defined($options{'repeat'})) { - $file="$annotate_dir\/sample_class.anno"; - } - else{ - $file=$rpkm; - } - my $annotate=`perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno`; - print "perl $path\/Annotate.pl -i $file -g $dir\/ref\/genelist.txt -d $up_down_dis -o $annotate_dir\/sample_c_p.anno\n\n"; - return 0; -} -sub get_genelist{ - - my $get_genelist=`perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt`; - print "perl $path\/get_genelist.pl -i $gff -o $dir\/ref\/genelist.txt"; -} - -sub dec{ - print "deg reading\n\n"; - my $deg_file=$options{'deg'}; - open IN,"<$deg_file"; - my @deg; - my $s=0; - while (my $aline=) { - chomp $aline; - next if($aline=~/^\#/); - $deg[$s]=$aline; - my @ea=split/\s+/,$aline; - push @pairdir,"$ea[0]_VS_$ea[1]\/"; - #print "$deg[$s]\n"; - $s++; - } - close IN; - $deg_dir=$dir."deg\/"; - mkdir ("$deg_dir"); - my $max_process = 10; - my $pm = new Parallel::ForkManager( $max_process ); - my $number=@deg-1; - foreach(0..$number){ - $pm->start and next; - &dec_pel($deg[$_]); - $pm->finish; - } - $pm->wait_all_children; -} - -sub dec_pel{ - print "\n******************\nstart:\n"; - Time(); - my $sample=shift(@_); - my @each=split/\s+/,$sample; - print "$each[0]\t$each[1]\n"; - my $deg_sample_dir=$deg_dir."$each[0]_VS_$each[1]\/"; - mkdir ("$deg_sample_dir"); - print "read: $read\n"; - print "deg_sample_dir: $deg_sample_dir\n"; - print "$id{$each[0]}\t$each[0]\n"; - print "$id{$each[1]}\t$each[1]\n"; - my $deg=`perl $path\/DEGseq_2.pl -i $read -outdir $deg_sample_dir -column1 $id{$each[0]} -mark1 $each[0] -column2 $id{$each[1]} -mark2 $each[1]`; #-depth1 -depth2 - my $time2=time(); - print "end:\n*************************\n"; - Time(); - sleep 1; -} - -sub infor_merge{ - my ($input,$mark); - foreach (@pairdir) { - print "@pairdir\n"; - $mark.=" -mark $_ "; - $input.=" -i $dir/deg\/$_\/output_score\.txt "; - print "$input\n$mark\n"; - } - my $infor_merge=`perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result `; - print "perl $path\/SampleDEGseqMerge.pl $input $mark -f $annotate_dir\/sample_c_p.anno -n $sample_number -o $dir\/total.result\n\n"; -} - -sub infor_merge_no_dec{ - my $infor_merge_no_dec=`cp $annotate_dir\/sample_c_p.anno $dir\/total.result`; -} - -sub genome_length{ - my $length=`perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length`; - print "perl $path\/count_ref_length.pl -i $genome_fa -o $dir\/ref\/genome\.length\n\n" - -} - -sub plot{ - $plot_dir="$dir\/plot\/"; - mkdir ("$plot_dir"); - my $span=defined($options{span})?$options{span}:50000; - my $cen=""; - if (defined $options{cen}) { - $cen="-cen $options{cen}"; - } - my $plot=`perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome\.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt `; - "print perl $path/sRNA_plot.pl -c $rpkm -g $dir/ref/genelist.txt -span 50000 -mark $sample_mark -l $dir/ref/genome.length $cen -o $plot_dir/cluster.html -out $plot_dir/cluster.txt \n"; - -} - -sub html{ - my $pathfile="$dir/path.txt"; - open PA,">$pathfile"; - print PA "$config\n"; - print PA "$preprocess\n"; - print PA "$dir"."rfam_match\n"; - print PA "$dir"."genome_match\n"; - print PA "$cluster_file\n"; - print PA "$annotate_dir\n"; - print PA "$plot_dir\n"; - if (defined($deg_dir)) { - print PA "$deg_dir\n"; - } - close PA; - my $html=`perl $path\/html.pl -i $pathfile -format $format -o $dir/result.html`; -} - -sub Time{ - my $time=time(); - my ($sec,$min,$hour,$day,$month,$year) = (localtime($time))[0,1,2,3,4,5,6]; - $month++; - $year+=1900; - if (length($sec) == 1) {$sec = "0"."$sec";} - if (length($min) == 1) {$min = "0"."$min";} - if (length($hour) == 1) {$hour = "0"."$hour";} - if (length($day) == 1) {$day = "0"."$day";} - if (length($month) == 1) {$month = "0"."$month";} - print "$year-$month-$day $hour:$min:$sec\n"; - return("$year-$month-$day-$hour-$min-$sec"); -} -################################################################################# diff -r 9dcffd531c76 -r b6686462d0cb siRNA.xml --- a/siRNA.xml Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - - tool for plant siRNA analisis - - - SCRIPT_PATH - bowtie - R - degseq - fastx_toolkit - threads - Parallel-ForkManager - SVG - Boost-Graph - - - siRNA.pl - ## Change this to accommodate the number of threads you have available. - -t \${GALAXY_SLOTS:-4} - - -path \$SCRIPT_PATH - - #for $j, $s in enumerate( $series ) - ##rank_of_series=$j - -i ${s.input} - -tag ${s.tag} - #end for - - ## Do or not annotate siRNAs by function - #if $params.function_anno == "yes": - -nat $params.nat -repeat $params.repeat - #end if - - ## Do or not DEG - #if $degseq.degseq_analysis == "yes" : - -deg $degseq.deg - #end if - - -format $format -g $genome -f $gff -mis $mis -rfam $rfam -v $v -a $a -n $mapnt -d $d -p $p -l $l -cen $cen -span $span > run.log - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 9dcffd531c76 -r b6686462d0cb tool_dependencies.xml --- a/tool_dependencies.xml Wed Nov 05 21:09:35 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ - - - - - - - - - - $REPOSITORY_INSTALL_DIR - - - - - - - - - - - - - - R CMD BATCH $REPOSITORY_INSTALL_DIR/install_DEG.R - - - - - - - - http://www.cpan.org/authors/id/J/JD/JDHEDDEN/threads-1.96.tar.gz - perl Makefile.PL PREFIX=$INSTALL_DIR - make - make install - - $INSTALL_DIR/lib - - - - - - - - - - http://www.cpan.org/authors/id/S/SZ/SZABGAB/Parallel-ForkManager-1.06.tar.gz - perl Makefile.PL PREFIX=$INSTALL_DIR - make - make install - - $INSTALL_DIR/lib - - - - - - - - - http://www.cpan.org/authors/id/S/SZ/SZABGAB/SVG-2.59.tar.gz - perl Makefile.PL PREFIX=$INSTALL_DIR - make - make install - - $INSTALL_DIR/lib - - - - - - - - - http://www.cpan.org/authors/id/D/DB/DBURDICK/BoostGraph/Boost-Graph-1.4.tar.gz - perl Makefile.PL PREFIX=$INSTALL_DIR - make - make install - - $INSTALL_DIR/lib - - - - - -