comparison adjust_bracken_for_unclassified_reads.py @ 2:87459bd1615a draft

planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 4ebe8216c423e2d66be92247f273df21cb5852f1
author public-health-bioinformatics
date Thu, 27 Oct 2022 00:37:27 +0000
parents 3ab9d37e547e
children
comparison
equal deleted inserted replaced
1:3cde438eb222 2:87459bd1615a
41 41
42 def main(args): 42 def main(args):
43 kraken_report = parse_kraken_report(args.kraken_report) 43 kraken_report = parse_kraken_report(args.kraken_report)
44 bracken_abundances = parse_bracken_abundances(args.bracken_abundances) 44 bracken_abundances = parse_bracken_abundances(args.bracken_abundances)
45 45
46 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] 46 try:
47 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level']
48 except IndexError as e:
49 kraken_report_unclassified_seqs = 0
47 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] 50 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total']
48 51
49 total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs 52 total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs
50 percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) 53 fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs)
51
52 bracken_unclassified_entry = {
53 'name': 'unclassified',
54 'taxonomy_id': 0,
55 'taxonomy_lvl': 'U',
56 'kraken_assigned_seqs': kraken_report_unclassified_seqs,
57 'bracken_assigned_seqs': kraken_report_unclassified_seqs,
58 'kraken_fraction_total_seqs': percent_unclassified,
59 'bracken_fraction_total_seqs': 0.0,
60 }
61
62 bracken_abundances = [bracken_unclassified_entry] + bracken_abundances
63 54
64 output_fieldnames = [ 55 output_fieldnames = [
65 'name', 56 'name',
66 'taxonomy_id', 57 'taxonomy_id',
67 'taxonomy_lvl', 58 'taxonomy_lvl',
80 kraken_adjusted_fraction_total_seqs = float(b['kraken_assigned_seqs']) / float(total_seqs) 71 kraken_adjusted_fraction_total_seqs = float(b['kraken_assigned_seqs']) / float(total_seqs)
81 b['kraken_fraction_total_seqs'] = '{:.6f}'.format(kraken_adjusted_fraction_total_seqs) 72 b['kraken_fraction_total_seqs'] = '{:.6f}'.format(kraken_adjusted_fraction_total_seqs)
82 bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs) 73 bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs)
83 b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs) 74 b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs)
84 75
85 for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True): 76 bracken_unclassified_entry = {
77 'name': 'unclassified',
78 'taxonomy_id': 0,
79 'taxonomy_lvl': 'U',
80 'kraken_assigned_seqs': kraken_report_unclassified_seqs,
81 'bracken_assigned_seqs': kraken_report_unclassified_seqs,
82 'total_seqs': total_seqs,
83 'kraken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified),
84 'bracken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified),
85 }
86
87 bracken_abundances = sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True)
88 bracken_abundances = [bracken_unclassified_entry] + bracken_abundances
89 for b in bracken_abundances:
86 writer.writerow(b) 90 writer.writerow(b)
87 91
88 92
89 if __name__ == '__main__': 93 if __name__ == '__main__':
90 parser = argparse.ArgumentParser() 94 parser = argparse.ArgumentParser()