# HG changeset patch # User iuc # Date 1619431303 0 # Node ID cd0874854f51db59784f4310656b8221542ed06f # Parent 16f1f3e2de426a34a75654a3697b309855548987 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fasta_stats/ commit adc5e3616c1849551c9a712b651b0d1c6b0e88f1" diff -r 16f1f3e2de42 -r cd0874854f51 fasta-stats.pl --- a/fasta-stats.pl Wed Apr 21 09:10:46 2021 +0000 +++ b/fasta-stats.pl Mon Apr 26 10:01:43 2021 +0000 @@ -47,7 +47,7 @@ # sort length array # (should use hash here for efficiency with huge no of short reads?) -@len = sort { $a <=> $b } @len; +@len = sort { $b <=> $a } @len; # compute more stats @@ -62,12 +62,12 @@ # calculate n50 my $thresh = int 0.5 * $stat{'num_bp'}; - $stat{'len_N50'} = &calc_x50(@len, $thresh); + ($stat{'len_N50'}, $stat{'L50'}) = &calc_x50(\@len, $thresh); #calculate NG50 if ($calc_ng50) { - my $thresh = int 0.5 * $genome_size * 1000000; - $stat{'len_NG50'} = &calc_x50(@len, $thresh); + my $thresh = int 0.5 * $genome_size; + ($stat{'len_NG50'}, $stat{'LG50'}) = &calc_x50(\@len, $thresh); } } @@ -101,15 +101,16 @@ # N50/NG50 calculation sub sub calc_x50{ - my @x = shift; + my $ref = shift; + my @x = @$ref; my $thresh = shift; my $cum=0; for my $i (0 .. $#x) { $cum += $x[$i]; if ($cum >= $thresh) { - return $x[$i]; + return $x[$i], $i+1; } } - return 0; + return (0,0); } diff -r 16f1f3e2de42 -r cd0874854f51 fasta-stats.xml --- a/fasta-stats.xml Wed Apr 21 09:10:46 2021 +0000 +++ b/fasta-stats.xml Mon Apr 26 10:01:43 2021 +0000 @@ -14,7 +14,7 @@ - + @@ -25,8 +25,8 @@ - - + + diff -r 16f1f3e2de42 -r cd0874854f51 test-data/ng50_input.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ng50_input.fasta Mon Apr 26 10:01:43 2021 +0000 @@ -0,0 +1,88 @@ +>1 +ACTTGATCCTGCTCCCTCGGTGTCTGCATTGACTCCTCATGCTGGGACTG +GACCCGTCAACCCCCCTGCTCGCTGCTCACGTACCTTCATCACTTTTAGT +GATGATGCAACTTTCGAGGAATGGTTCCCCCAAGGGCGGCCCCCAAAAGT +CCCTGTTCGTGAGGTCTGTCCAGTGACCCATCGTCCAGCCCTATACCGGG +ACCCTGTTACAGACATACCCTATGCCACTGCTCGAGCCTTCAAGATCATT +CGTGAGGCTTACAAGAAGTACATTACTGCCCATGGACTGCCGCCCACTGC +CTCAGCCCTGGGCCCCGGCCCGCCACCTCCTGAGCCCCTCCCTGGCTCTG +GGCCCCGAGCCTTGCGCCAGAAAATTGTCATTAAATGA +>2 +ATGGCGTCGGCCTCCTCCGGGCCGTCGTCTTCGGTCGGTTTTTCATCCTT +TGATCCCGCGGTCCCTTCCTGTACCTTGTCCTCAG +>3 +GTTCTCAGCTTCCTTGCTTCCATGGCTCCAGCACCATTCGAAACCTCAAA +GAGAGGTTCCACATGAGCATGACTGAGGAGCAGCTGCAGCTGCTGGTGGA +GCAGATGGTGGATGGCAGTATGCGGTCTATCACCACCAAACTCTATGACG +GCTTCCAGTACCTCACCAACGGCATCATGTGA +>4 +ATGGAAGCGTTTTTGGGGTCGCGGTCCGGACTTTGGGCGGGGGGTCCGGC +CCCAGGACAGTTTTACCGCATTCCGTCCACTCCCGATTCCTTCATGGATC +CGGCGTCTGCACTTTACAGAGGTCCAATCACGCGGACCCA +>5 +TCTTTTCCTTCTCTACCATTTTCAACAAAGCAGGGGAAATAACTCAGTCT +CAGAAGACAGGAAACATCAACAAGTTGTGATGCCCTTTTCTTCCAATACT +ATTGAGGCTCACAAGTCAGCTCATGTAGACGGATCACTTAAGAGCAACAA +ACTGAAGTCTGCAAGAAAATTCACATTTCTATCTGATGAGGATGACTTAA +GTGCCCATAATCCCCTTTATAAGGAAAACATAAGTCAAGTATCAACAAAT +TCAGACATTTCACAGAGAACAGATTTTGTAGACCCATTTTCACCCAAAAT +ACAAGCCAAGAGTAAGTCTCTGAGGGGCCCAAGAGAAAAGATTCAGAGGC +TGTGGAGTCAGTCAGTCAGCTTACCCAGGAGGCTGATGAGGAAAGTTCCA +AATAGACCAGAGATCATAGATCTGCAGCAGTGGCAAGGCACCAGGCAGAA +AGCTGAAAATGAAAACACTGGAATCTGTACAAACAAAAGAGGTAGCAGCA +ATCCATTGCTTACAACTGAAGAGGCAAATTTGACAGAGAAAGAGGAAATA +AGGCAAGGTGAAACACTGATGATAGAAGGAACAGAACAGTTGAAATCTCT +CTCTTCAGACTCTTCATTTTGCTTTCCCAGGCCTCACTTCTCATTCTCCA +CTTTGCCAACTGTTTCAAGAACTGTGGAACTCAAATCAGAACCTAATGTC +ATCAGTTCTCCTGCTGAGTGTTCCTTGGAACTTTCTCCTTCAAGGCCTTG +TGTTTTACATTCTTCACTCTCTAGGAGAGAGACACCTATTTGTATGTTAC +CTATTGAAACCGAAAGAAATATTTTTGAAAATTTTGCCCATCCACCAAAC +ATCTCTCCTTCTGCCTGNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNACATTTTTTCCACTTTCCGTTTCAACGTCTGGTCCCCC +AACAccacctcttctacctccatttccaactcctcttcctccaccacctc +cttctattccttgccctccacctccttcAGCTTCATTTCTGTCCACAGAG +TGTGTCTGTATAACAGGTGTTAAATGCACGACCAACTTGATGCCTGCCGA +GAAAATTAAGTCCTCTATGACACAGCTATCAACAACGACAGTGTGTAAAA +CAGACCCTCAGAGAGAACCAAAAGGCATCCTCAGACACGTTAAAAACTTA +GCAGAACTTGAAAAATCAGTAGCTAACATGTACAGTCAAATAGAAAAAAA +CTATCTACGCACAAATGTTTCAGAACTTCAAACTATGTGCCCTTCAGAAG +TAACAAATATGGAAATCACATCTGAACAAAACAAGGGGAGTTTGAACAAT +ATTGTCGAGGGAACTGAAAAACAATCTCACAGTCAATCTACTTCACTGTA +A +>6 +ATCCAATGGATTTGAACAGAAGCGCTTTGCCAGGCTTGCCAGCAAGAAGG +CAGTGGAGGAACTTGCCTACAAATGGAGTGTTGAGGATATGTAA +>7 +ATGCAGCCCCGGGTACTCCTTGTTGTTGCCCTCCTGGCGCTCCTGGCCTC +TGCCC +>8 +CCTAAAGCTCCTTGACAACTGGGACAGCGTGACCTCCACCTTCAGCAAGC +TGCGCGAACAGCTCGGCCCTGTGACCCAGGAGTTCTGGGATAACCTGGAA +AAGGAGACAGAGGGCCTGAGGCAGGAGATGAGCAAGGATCTGGAGGAGGT +GAAGGCCAAGGTGCAGCCCTACCTGGACGACTTCCAGAAGAAGTGGCAGG +AGGAGATGGAGCTCTACCGCCAGAAGGTGGAGCCGCTGCGCGCAGAGCTC +CAAGAGGGCGCGCGCCAGAAGCTGCACGAGCTGCAAGAGAAGCTGAGCCC +ACTGGGCGAGGAGATGCGCGACCGCGCGCGCGCCCATGTGGACGCGCTGC +GCACGCATCTGGCCCCCTACAGCGACGAGCTGCGCCAGCGCTTGGCCGCG +CGCCTTGAGGCTCTCAAGGAGAACGGCGGCGCCAGACTGGCCGAGTACCA +CGCCAAGGCCACCGAGCATCTGAGCACGCTCAGCGAGAAGGCCAAGCCCG +CGCTCGAGGACCTCCGCCAAGGCCTGCTGCCCGTGCTGGAGAGCTTCAAG +GTCAGCTTCCTGAGCGCTCTCGAGGAGTACACTAAGAAGCTCAACACCCA +GTGA +>9 +ATGCTCCACCTGCATGGCTGGCAAACCATG +>10 +GAGCTTTCTTCCTCTATGCTGGATTTGCTGCTGTGGGACTCCTTTTCATC +TATGGCTGTCTTCCTGAGACCAAAGGCAAAAAATTAGAGGAAATTGAATC +ACTCTTTGACAACAGGCTATGTACATGTGGCACTTCAGATTCTGATGAAG +GGAGATATATTGAATATATTCGGGTAAAGGGAAGTAACTATCATCTTTCT +GACAATGATGCTTCTGATGTGGAATAA +>11 +ATGAACTCACCAGAGGCGAGGCTCTGCGTTGCTCAATGCAGAGACTCTTA +CCCAGGGTGTCAGCCTCTGAAAGATACACGTGCCTGGGCCTCTTCCCTGA +AGATGGACCCGGCAGGTCTGGAGGGAGGCCCCCGTGATGAATCCCGTGAT +GAGCCGCCGATCCGAGCTCAGGCTGCGTCATGGGACCAGCCACAAGGTTG +CCTGACCTATAAAGGTCGCAGGAGTGCCTCAGGGACACAGAAGCAGTTAC +AGCTGCCAG \ No newline at end of file diff -r 16f1f3e2de42 -r cd0874854f51 test-data/ng50_out.txt --- a/test-data/ng50_out.txt Wed Apr 21 09:10:46 2021 +0000 +++ b/test-data/ng50_out.txt Mon Apr 26 10:01:43 2021 +0000 @@ -1,15 +1,17 @@ -GC_content 52.0 -len_N50 194780 -len_NG50 0 -len_max 194780 -len_mean 194780 -len_median 194780 -len_min 194780 -num_A 46297 -num_C 50626 -num_G 50678 -num_N 0 -num_T 47179 -num_bp 194780 -num_bp_not_N 194780 -num_seq 1 +GC_content 51.1 +L50 2 +LG50 2 +len_N50 604 +len_NG50 604 +len_max 30 +len_mean 324 +len_median 182 +len_min 1501 +num_A 895 +num_C 940 +num_G 807 +num_N 145 +num_T 778 +num_bp 3565 +num_bp_not_N 3420 +num_seq 11 diff -r 16f1f3e2de42 -r cd0874854f51 test-data/test_out.txt --- a/test-data/test_out.txt Wed Apr 21 09:10:46 2021 +0000 +++ b/test-data/test_out.txt Mon Apr 26 10:01:43 2021 +0000 @@ -1,4 +1,5 @@ GC_content 52.0 +L50 1 len_N50 194780 len_max 194780 len_mean 194780