annotate fasta-stats.pl @ 1:16f1f3e2de42 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
author iuc
date Wed, 21 Apr 2021 09:10:46 +0000
parents 9c620a950d3a
children cd0874854f51
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
1 #!/usr/bin/env perl
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
2
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
3 # fasta-stats
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
4 # written by torsten.seemann@monash.edu
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
5 # oct 2012
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
6
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
7 use strict;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
8 use warnings;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
9 use List::Util qw(sum min max);
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
10
1
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
11
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
12 #Parameters
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
13 my $file = shift;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
14 my $calc_ng50 = 0;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
15 my $genome_size = 0;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
16 if (scalar(@ARGV) > 0){
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
17 $genome_size = $ARGV[0];
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
18 $calc_ng50 = 1;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
19 }
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
20
0
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
21 # stat storage
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
22
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
23 my $n=0;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
24 my $seq = '';
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
25 my %stat;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
26 my @len;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
27
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
28 # MAIN LOOP collecting sequences
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
29
1
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
30 #open the file first
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
31 open IN, $file or die{ "Couldn't open $file for reading\n$!" };
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
32
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
33 while (my $line = <IN>) {
0
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
34 chomp $line;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
35 if ($line =~ m/^\s*>/) {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
36 process($seq) if $n;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
37 $n++;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
38 $seq='';
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
39 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
40 else {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
41 $seq .= $line;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
42 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
43 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
44
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
45 process($seq) if $n;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
46
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
47 # sort length array
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
48 # (should use hash here for efficiency with huge no of short reads?)
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
49
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
50 @len = sort { $a <=> $b } @len;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
51
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
52 # compute more stats
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
53
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
54 $stat{'num_seq'} = scalar(@len);
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
55
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
56 if (@len) {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
57 $stat{'num_bp'} = sum(@len);
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
58 $stat{'len_min'} = $len[0];
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
59 $stat{'len_max'} = $len[-1];
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
60 $stat{'len_median'} = $len[int(@len/2)];
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
61 $stat{'len_mean'} = int( $stat{'num_bp'} / $stat{'num_seq'} );
1
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
62
0
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
63 # calculate n50
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
64 my $thresh = int 0.5 * $stat{'num_bp'};
1
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
65 $stat{'len_N50'} = &calc_x50(@len, $thresh);
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
66
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
67 #calculate NG50
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
68 if ($calc_ng50) {
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
69 my $thresh = int 0.5 * $genome_size * 1000000;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
70 $stat{'len_NG50'} = &calc_x50(@len, $thresh);
0
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
71 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
72 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
73
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
74 #calculate GC content
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
75 $stat{'num_bp_not_N'} = $stat{'num_G'} + $stat{'num_C'} + $stat{'num_A'} + $stat{'num_T'};
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
76 $stat{'GC_content'} = ($stat{'num_G'} + $stat{'num_C'}) / $stat{'num_bp_not_N'}*100;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
77
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
78 # print stats as .tsv
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
79
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
80 for my $name (sort keys %stat) {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
81 if ($name =~ m/GC_content/){
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
82 printf "%s\t%0.1f\n", $name, $stat{$name};
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
83 } else {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
84 printf "%s\t%s\n", $name, $stat{$name};
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
85 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
86 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
87
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
88 # run for each sequence
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
89
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
90 sub process {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
91 my($s) = @_;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
92 # base composition
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
93 for my $x (qw(A G T C N)) {
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
94 my $count = $s =~ s/$x/$x/gi;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
95 $stat{"num_$x"} += $count;
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
96 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
97 # keep list of all lengths encountered
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
98 push @len, length($s);
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
99 }
9c620a950d3a planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit d6a78405947a91659a4168ddb2f1534327f044cb
iuc
parents:
diff changeset
100
1
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
101 # N50/NG50 calculation sub
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
102
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
103 sub calc_x50{
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
104 my @x = shift;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
105 my $thresh = shift;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
106 my $cum=0;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
107 for my $i (0 .. $#x) {
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
108 $cum += $x[$i];
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
109 if ($cum >= $thresh) {
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
110 return $x[$i];
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
111 }
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
112 }
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
113 return 0;
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
114 }
16f1f3e2de42 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit 02d0ae7ac02425ef454d2e42a0513887596a3b4d"
iuc
parents: 0
diff changeset
115