Mercurial > repos > bigrna > gpsrna
diff conventional.pl @ 0:87fe81de0931 draft default tip
Uploaded
author | bigrna |
---|---|
date | Sun, 04 Jan 2015 02:47:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/conventional.pl Sun Jan 04 02:47:25 2015 -0500 @@ -0,0 +1,156 @@ +#!/usr/bin/perl -w +#Filename: +#Author: Chentt +#Email: chentt@big.ac.cn +#Date: 2014/04/09 +#Modified: +#Description: islands merged of merged samples +my $version=1.00; + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h"); +if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments +&usage; +} + +my $filein=$opts{'i'}; +my $fileout=$opts{'o'}; +my $distance=$opts{'d'}; +my $tempout=$opts{'t'}; +my $mark=$opts{'mark'}; +my @sample=split/\#/,$mark; +$mark=join"\"\t\"",@sample; + +open IN,"<$filein"; #input file +open OUT,">$fileout"; #output file +print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n"; +open TMP,">$tempout"; +print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n"; +my %hash; +while (my $aline=<IN>) { + chomp $aline; + if($aline=~/^\#/){ + #print OUT "$aline\n"; + next; + } + my @tmp=split/\t/,$aline; + my $chr=shift @tmp; + #shift @tmp; + push @{$hash{$chr}},[@tmp]; +} + +close IN; + +foreach my $key (keys %hash) { + my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}}; + my @sample; + my $start=$tag[0][1]; + my $end=$tag[0][2]; + push @sample,[@{$tag[0]}]; + for (my $i=1;$i<@tag-1;$i++) { + if ($tag[$i][1]-$end<=$distance) { + if ($tag[$i][2]>$end) { + $end=$tag[$i][2]; + } + push @sample,[@{$tag[$i]}]; + } + else{ + my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); + my $cluster_exp=join"\t",@cluster_exp; + if ($max_length>30) { + print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + $max_length="\>30"; + } + else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} + print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; + $start=$tag[$i][1]; + $end=$tag[$i][2]; + + @sample=(); + push @sample,[@{$tag[$i]}]; + } + } + if ($tag[$#tag][1]-$end<=$distance) { + if ($tag[$#tag][2]>$end) { + $end=$tag[$#tag][2]; + } + push @sample,[@{$tag[$#tag]}]; + my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); + my $cluster_exp=join"\t",@cluster_exp; + if ($max_length>30) { + $max_length="\>30"; + print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + } + else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} + print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; + } + else{ + my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample); + my $cluster_exp=join"\t",@cluster_exp; + if ($max_length>30) { + $max_length="\>30"; + print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n"; + } + else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";} + print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n"; + + } +} +close OUT; +close TMP; +sub Max_length{ + my @exp=@{$_[0]}; + my %sample_length; + my $total_exp; + my @each; + my @tag; + for (my $i=0;$i<=$#exp ;$i++) { + my $length=$exp[$i][2]-$exp[$i][1]+1; + #if ($length>30) { + # $length=40; + #} + my $exp=0; + foreach (1..$opts{'N'}) { + $exp+=$exp[$i][$_+2]; + $each[$_-1]+=$exp[$i][$_+2]; + } + $sample_length{$length}+=$exp; + $total_exp+=$exp; + push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp); + } + my $max=0; + my $max_key; + foreach my $key (sort keys %sample_length) { + my $p=$sample_length{$key}/$total_exp; + if ($p>$max) { + $max=$p; + $max_key=$key; + } + $sample_length{$key}=sprintf("%.2f",$p); + } + my $tag_n=@tag; + my $tag=join";",@tag; + $tag=$tag_n."\t".$tag; + return($max_key,$sample_length{$max_key},$tag,@each); +} + +sub usage{ +print <<"USAGE"; +Version $version +Usage: +$0 -i -o -d -N -t -mark +options: +-i input file +-d distance of two islands +-mark sample name; +-o output file +-N sample number +-t temp output file +-h help +USAGE +exit(1); +} +