comparison conventional.pl @ 0:07745c0958dd draft

Uploaded
author big-tiandm
date Thu, 18 Sep 2014 21:40:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:07745c0958dd
1 #!/usr/bin/perl -w
2 #Filename:
3 #Author: Chentt
4 #Email: chentt@big.ac.cn
5 #Date: 2014/04/09
6 #Modified:
7 #Description: islands merged of merged samples
8 my $version=1.00;
9
10 use strict;
11 use Getopt::Long;
12
13 my %opts;
14 GetOptions(\%opts,"i=s","d=i","o=s","N=i","t=s","mark=s","h");
15 if (!(defined $opts{i} and defined $opts{d} and defined $opts{N} and defined $opts{mark} and defined $opts{t} and defined $opts{o} ) || defined $opts{h}) { #necessary arguments
16 &usage;
17 }
18
19 my $filein=$opts{'i'};
20 my $fileout=$opts{'o'};
21 my $distance=$opts{'d'};
22 my $tempout=$opts{'t'};
23 my $mark=$opts{'mark'};
24 my @sample=split/\#/,$mark;
25 $mark=join"\"\t\"",@sample;
26
27 open IN,"<$filein"; #input file
28 open OUT,">$fileout"; #output file
29 print OUT "\"Chr\"\t\"MajorLength\"\t\"Percent\"\t\"$mark\"\n";
30 open TMP,">$tempout";
31 print TMP "\#Chr\tMajorLength\tPercent\tTagsNumber\tTagsInfor\n";
32 my %hash;
33 while (my $aline=<IN>) {
34 chomp $aline;
35 if($aline=~/^\#/){
36 #print OUT "$aline\n";
37 next;
38 }
39 my @tmp=split/\t/,$aline;
40 my $chr=shift @tmp;
41 #shift @tmp;
42 push @{$hash{$chr}},[@tmp];
43 }
44
45 close IN;
46
47 foreach my $key (keys %hash) {
48 my @tag=sort{$a->[1] <=> $b->[1]} @{$hash{$key}};
49 my @sample;
50 my $start=$tag[0][1];
51 my $end=$tag[0][2];
52 push @sample,[@{$tag[0]}];
53 for (my $i=1;$i<@tag-1;$i++) {
54 if ($tag[$i][1]-$end<=$distance) {
55 if ($tag[$i][2]>$end) {
56 $end=$tag[$i][2];
57 }
58 push @sample,[@{$tag[$i]}];
59 }
60 else{
61 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
62 my $cluster_exp=join"\t",@cluster_exp;
63 if ($max_length>30) {
64 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
65 $max_length="\>30";
66 }
67 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
68 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
69 $start=$tag[$i][1];
70 $end=$tag[$i][2];
71
72 @sample=();
73 push @sample,[@{$tag[$i]}];
74 }
75 }
76 if ($tag[$#tag][1]-$end<=$distance) {
77 if ($tag[$#tag][2]>$end) {
78 $end=$tag[$#tag][2];
79 }
80 push @sample,[@{$tag[$#tag]}];
81 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
82 my $cluster_exp=join"\t",@cluster_exp;
83 if ($max_length>30) {
84 $max_length="\>30";
85 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
86 }
87 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
88 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
89 }
90 else{
91 my ($max_length,$max_p,$tag,@cluster_exp)=Max_length(\@sample);
92 my $cluster_exp=join"\t",@cluster_exp;
93 if ($max_length>30) {
94 $max_length="\>30";
95 print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";
96 }
97 else{print TMP "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$tag\n";}
98 print OUT "$key\:$start\-$end\t$max_length"."nt\t$max_p\t$cluster_exp\n";
99
100 }
101 }
102 close OUT;
103 close TMP;
104 sub Max_length{
105 my @exp=@{$_[0]};
106 my %sample_length;
107 my $total_exp;
108 my @each;
109 my @tag;
110 for (my $i=0;$i<=$#exp ;$i++) {
111 my $length=$exp[$i][2]-$exp[$i][1]+1;
112 #if ($length>30) {
113 # $length=40;
114 #}
115 my $exp=0;
116 foreach (1..$opts{'N'}) {
117 $exp+=$exp[$i][$_+2];
118 $each[$_-1]+=$exp[$i][$_+2];
119 }
120 $sample_length{$length}+=$exp;
121 $total_exp+=$exp;
122 push @tag,($exp[$i][1].",".$exp[$i][2].",".$exp[$i][0].",".$exp);
123 }
124 my $max=0;
125 my $max_key;
126 foreach my $key (sort keys %sample_length) {
127 my $p=$sample_length{$key}/$total_exp;
128 if ($p>$max) {
129 $max=$p;
130 $max_key=$key;
131 }
132 $sample_length{$key}=sprintf("%.2f",$p);
133 }
134 my $tag_n=@tag;
135 my $tag=join";",@tag;
136 $tag=$tag_n."\t".$tag;
137 return($max_key,$sample_length{$max_key},$tag,@each);
138 }
139
140 sub usage{
141 print <<"USAGE";
142 Version $version
143 Usage:
144 $0 -i -o -d -N -t -mark
145 options:
146 -i input file
147 -d distance of two islands
148 -mark sample name;
149 -o output file
150 -N sample number
151 -t temp output file
152 -h help
153 USAGE
154 exit(1);
155 }
156