Mercurial > repos > public-health-bioinformatics > adjust_bracken_for_unclassified_reads
changeset 2:87459bd1615a draft
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
author | public-health-bioinformatics |
---|---|
date | Thu, 27 Oct 2022 00:37:27 +0000 |
parents | 3cde438eb222 |
children | 899a650587ed |
files | adjust_bracken_for_unclassified_reads.py adjust_bracken_for_unclassified_reads.xml test-data/input/zero_unclassified_bracken_abundances.tsv test-data/input/zero_unclassified_kraken2.txt test-data/output/SRR17907745_bracken_abundances_adjusted.tsv test-data/output/zero_unclassified_bracken_abundances_adjusted.tsv |
diffstat | 6 files changed, 41 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/adjust_bracken_for_unclassified_reads.py Thu Mar 10 21:39:43 2022 +0000 +++ b/adjust_bracken_for_unclassified_reads.py Thu Oct 27 00:37:27 2022 +0000 @@ -43,23 +43,14 @@ kraken_report = parse_kraken_report(args.kraken_report) bracken_abundances = parse_bracken_abundances(args.bracken_abundances) - kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] + try: + kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] + except IndexError as e: + kraken_report_unclassified_seqs = 0 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs - percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) - - bracken_unclassified_entry = { - 'name': 'unclassified', - 'taxonomy_id': 0, - 'taxonomy_lvl': 'U', - 'kraken_assigned_seqs': kraken_report_unclassified_seqs, - 'bracken_assigned_seqs': kraken_report_unclassified_seqs, - 'kraken_fraction_total_seqs': percent_unclassified, - 'bracken_fraction_total_seqs': 0.0, - } - - bracken_abundances = [bracken_unclassified_entry] + bracken_abundances + fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) output_fieldnames = [ 'name', @@ -82,7 +73,20 @@ bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs) b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs) - for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True): + bracken_unclassified_entry = { + 'name': 'unclassified', + 'taxonomy_id': 0, + 'taxonomy_lvl': 'U', + 'kraken_assigned_seqs': kraken_report_unclassified_seqs, + 'bracken_assigned_seqs': kraken_report_unclassified_seqs, + 'total_seqs': total_seqs, + 'kraken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified), + 'bracken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified), + } + + bracken_abundances = sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True) + bracken_abundances = [bracken_unclassified_entry] + bracken_abundances + for b in bracken_abundances: writer.writerow(b)
--- a/adjust_bracken_for_unclassified_reads.xml Thu Mar 10 21:39:43 2022 +0000 +++ b/adjust_bracken_for_unclassified_reads.xml Thu Oct 27 00:37:27 2022 +0000 @@ -1,5 +1,5 @@ -<tool id="adjust_bracken_for_unclassified_reads" name="Adjust Bracken Report for Unclassified Reads" version="0.1.0"> - <description>Adjust bracken report to account for unclassified reads.</description> +<tool id="adjust_bracken_for_unclassified_reads" name="Adjust Bracken Report for Unclassified Reads" version="0.2.0+galaxy0"> + <description>Adjust bracken report to account for unclassified reads</description> <requirements> </requirements> <command detect_errors="exit_code"><![CDATA[ @@ -26,6 +26,11 @@ <param name="bracken_abundances" value="input/SRR17907745_bracken_abundances.tsv"/> <output name="adjusted_bracken_report" file="output/SRR17907745_bracken_abundances_adjusted.tsv" ftype="tabular"/> </test> + <test> + <param name="kraken_report" value="input/zero_unclassified_kraken2.txt"/> + <param name="bracken_abundances" value="input/zero_unclassified_bracken_abundances.tsv"/> + <output name="adjusted_bracken_report" file="output/zero_unclassified_bracken_abundances_adjusted.tsv" ftype="tabular"/> + </test> </tests> <help><![CDATA[ ]]></help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input/zero_unclassified_bracken_abundances.tsv Thu Oct 27 00:37:27 2022 +0000 @@ -0,0 +1,2 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_reads added_reads new_est_reads fraction_total_reads +Klebsiella pneumoniae 573 S 25 70 95 0.95
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input/zero_unclassified_kraken2.txt Thu Oct 27 00:37:27 2022 +0000 @@ -0,0 +1,9 @@ + 100.0 100 5 R 1 root + 95.0 95 10 R1 131567 cellular organisms + 85.0 85 10 D 2 Bacteria + 75.0 75 20 P 1224 Proteobacteria + 65.0 65 10 C 1236 Gammaproteobacteria + 55.0 55 10 O 91347 Enterobacterales + 45.0 45 10 F 543 Enterobacteriaceae + 35.0 35 10 G 570 Klebsiella + 25.0 25 10 S 573 Klebsiella pneumoniae
--- a/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv Thu Mar 10 21:39:43 2022 +0000 +++ b/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv Thu Oct 27 00:37:27 2022 +0000 @@ -1,9 +1,9 @@ name taxonomy_id taxonomy_lvl kraken_assigned_seqs bracken_assigned_seqs total_seqs kraken_fraction_total_seqs bracken_fraction_total_seqs +unclassified 0 U 110613 110613 2570868 0.043026 0.043026 Klebsiella quasipneumoniae 1463165 S 484958 1017029 2570868 0.188636 0.395598 Escherichia coli 562 S 181539 751229 2570868 0.070614 0.292208 Klebsiella pneumoniae 573 S 94362 315713 2570868 0.036704 0.122804 Citrobacter freundii 546 S 116592 132960 2570868 0.045351 0.051718 -unclassified 0 U 110613 110613 2570868 0.043026 0.043026 Enterobacter hormaechei 158836 S 74706 85010 2570868 0.029059 0.033067 Enterobacter cloacae 550 S 73130 79124 2570868 0.028446 0.030777 Klebsiella variicola 244366 S 4879 10752 2570868 0.001898 0.004182
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/zero_unclassified_bracken_abundances_adjusted.tsv Thu Oct 27 00:37:27 2022 +0000 @@ -0,0 +1,3 @@ +name taxonomy_id taxonomy_lvl kraken_assigned_seqs bracken_assigned_seqs total_seqs kraken_fraction_total_seqs bracken_fraction_total_seqs +unclassified 0 U 0 0 100 0.000000 0.000000 +Klebsiella pneumoniae 573 S 25 95 100 0.250000 0.950000