annotate adjust_bracken_for_unclassified_reads.py @ 2:87459bd1615a draft

planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
author public-health-bioinformatics
date Thu, 27 Oct 2022 00:37:27 +0000
parents 3ab9d37e547e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
1 #!/usr/bin/env python
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
2
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
3 import argparse
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
4 import csv
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
5 import json
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
6 import sys
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
7
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
8 def parse_bracken_abundances(bracken_abundances_path):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
9 bracken_abundances = []
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
10 with open(bracken_abundances_path, 'r') as f:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
11 reader = csv.DictReader(f, dialect='excel-tab')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
12 for row in reader:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
13 b = {}
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
14 b['name'] = row['name']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
15 b['taxonomy_id'] = row['taxonomy_id']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
16 b['taxonomy_lvl'] = row['taxonomy_lvl']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
17 b['kraken_assigned_seqs'] = int(row['kraken_assigned_reads'])
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
18 b['bracken_assigned_seqs'] = int(row['new_est_reads'])
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
19 b['bracken_fraction_total_seqs'] = float(row['fraction_total_reads'])
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
20 bracken_abundances.append(b)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
21
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
22 return bracken_abundances
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
23
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
24
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
25 def parse_kraken_report(kraken_report_path):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
26 kraken_report = []
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
27 with open(kraken_report_path, 'r') as f:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
28 for line in f:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
29 kraken_line = {}
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
30 [percentage, seqs_total, seqs_this_level, taxonomic_level, ncbi_taxid, taxon_name] = line.strip().split(None, 5)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
31 kraken_line['percentage'] = float(percentage)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
32 kraken_line['seqs_total'] = int(seqs_total)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
33 kraken_line['seqs_this_level'] = int(seqs_this_level)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
34 kraken_line['taxonomic_level'] = taxonomic_level
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
35 kraken_line['ncbi_taxid'] = ncbi_taxid
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
36 kraken_line['taxon_name'] = taxon_name
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
37 kraken_report.append(kraken_line)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
38
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
39 return kraken_report
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
40
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
41
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
42 def main(args):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
43 kraken_report = parse_kraken_report(args.kraken_report)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
44 bracken_abundances = parse_bracken_abundances(args.bracken_abundances)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
45
2
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
46 try:
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
47 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level']
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
48 except IndexError as e:
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
49 kraken_report_unclassified_seqs = 0
0
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
50 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
51
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
52 total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs
2
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
53 fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs)
0
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
54
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
55 output_fieldnames = [
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
56 'name',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
57 'taxonomy_id',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
58 'taxonomy_lvl',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
59 'kraken_assigned_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
60 'bracken_assigned_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
61 'total_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
62 'kraken_fraction_total_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
63 'bracken_fraction_total_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
64 ]
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
65
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
66 writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='excel-tab')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
67 writer.writeheader()
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
68
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
69 for b in bracken_abundances:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
70 b['total_seqs'] = total_seqs
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
71 kraken_adjusted_fraction_total_seqs = float(b['kraken_assigned_seqs']) / float(total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
72 b['kraken_fraction_total_seqs'] = '{:.6f}'.format(kraken_adjusted_fraction_total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
73 bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
74 b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
75
2
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
76 bracken_unclassified_entry = {
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
77 'name': 'unclassified',
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
78 'taxonomy_id': 0,
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
79 'taxonomy_lvl': 'U',
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
80 'kraken_assigned_seqs': kraken_report_unclassified_seqs,
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
81 'bracken_assigned_seqs': kraken_report_unclassified_seqs,
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
82 'total_seqs': total_seqs,
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
83 'kraken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified),
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
84 'bracken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified),
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
85 }
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
86
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
87 bracken_abundances = sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True)
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
88 bracken_abundances = [bracken_unclassified_entry] + bracken_abundances
87459bd1615a planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
public-health-bioinformatics
parents: 0
diff changeset
89 for b in bracken_abundances:
0
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
90 writer.writerow(b)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
91
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
92
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
93 if __name__ == '__main__':
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
94 parser = argparse.ArgumentParser()
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
95 parser.add_argument('-k', '--kraken-report')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
96 parser.add_argument('-a', '--bracken-abundances')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
97 args = parser.parse_args()
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
98 main(args)