Mercurial > repos > bebatut > combine_metaphlan2_humann2
comparison combine_metaphlan2_humann2.py @ 1:e25efca0a49c draft
"planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/combine_metaphlan2_humann2 commit 4a40a73a6eb981310a3c1c54dfa98b5460a843f9"
author | bebatut |
---|---|
date | Mon, 14 Sep 2020 12:19:49 +0000 |
parents | 31394a0c0242 |
children | fdfb35745104 |
comparison
equal
deleted
inserted
replaced
0:31394a0c0242 | 1:e25efca0a49c |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # -*- coding: utf-8 -*- | 2 # -*- coding: utf-8 -*- |
3 | 3 |
4 import sys | |
5 import os | |
6 import argparse | 4 import argparse |
7 import re | |
8 | 5 |
9 def extract_clade_abundance(metaphlan2_filepath): | 6 |
10 clade_abundance = {} | 7 def extract_clade_abundance(metaphlan2_fp): |
11 with open(metaphlan2_filepath, 'r') as metaphlan2_file: | 8 clade_abund = {} |
12 for line in metaphlan2_file.readlines(): | 9 with open(metaphlan2_fp, 'r') as metaphlan2_f: |
10 for line in metaphlan2_f.readlines(): | |
13 if line.find('g__') == -1: | 11 if line.find('g__') == -1: |
14 continue | 12 continue |
15 | 13 |
16 split_line = line[:-1].split('\t') | 14 split_line = line[:-1].split('\t') |
17 taxo = split_line[0] | 15 taxo = split_line[0] |
18 abundance = split_line[1] | 16 abundance = split_line[1] |
19 | 17 |
20 genus = taxo[(taxo.find('g__')+3):] | 18 genus = taxo[(taxo.find('g__')+3):] |
21 if genus.find('|') != -1: | 19 if genus.find('|') != -1: |
22 genus = genus[:(genus.find('|'))] | 20 genus = genus[:(genus.find('|'))] |
23 clade_abundance.setdefault(genus, {'abundance':0, 'species':{}}) | 21 clade_abund.setdefault(genus, {'abundance': 0, 'species': {}}) |
24 if taxo.find('t__') != -1: | 22 if taxo.find('t__') != -1: |
25 continue | 23 continue |
26 elif taxo.find('s__') != -1: | 24 elif taxo.find('s__') != -1: |
27 species = taxo[(taxo.find('s__')+3):] | 25 species = taxo[(taxo.find('s__')+3):] |
28 clade_abundance[genus]['species'].setdefault(species, abundance) | 26 clade_abund[genus]['species'].setdefault( |
27 species, | |
28 abundance) | |
29 else: | 29 else: |
30 clade_abundance[genus]['abundance'] = abundance | 30 clade_abund[genus]['abundance'] = abundance |
31 return clade_abundance | 31 return clade_abund |
32 | 32 |
33 def compute_overall_abundance(humann2_file): | 33 |
34 def compute_overall_abundance(humann2_fp): | |
34 overall_abundance = 0 | 35 overall_abundance = 0 |
35 with open(args.humann2_file, 'r') as humann2_file: | 36 with open(humann2_fp, 'r') as humann2_f: |
36 for line in humann2_file.readlines(): | 37 for line in humann2_f.readlines(): |
37 if line.find('|') != -1 or line.startswith('#'): | 38 if line.find('|') != -1 or line.startswith('#'): |
38 continue | 39 continue |
39 split_line = line[:-1].split('\t') | 40 split_line = line[:-1].split('\t') |
40 overall_abundance += float(split_line[1]) | 41 overall_abundance += float(split_line[1]) |
41 return overall_abundance | 42 return overall_abundance |
42 | 43 |
44 | |
43 def format_characteristic_name(name): | 45 def format_characteristic_name(name): |
44 formatted_name = name | 46 formatted_n = name |
45 formatted_name = formatted_name.replace('/',' ') | 47 formatted_n = formatted_n.replace('/', ' ') |
46 formatted_name = formatted_name.replace('-',' ') | 48 formatted_n = formatted_n.replace('-', ' ') |
47 formatted_name = formatted_name.replace("'",'') | 49 formatted_n = formatted_n.replace("'", '') |
48 if formatted_name.find('(') != -1 and formatted_name.find(')') != -1: | 50 if formatted_n.find('(') != -1 and formatted_n.find(')') != -1: |
49 open_bracket = formatted_name.find('(') | 51 open_bracket = formatted_n.find('(') |
50 close_bracket = formatted_name.find(')')+1 | 52 close_bracket = formatted_n.find(')')+1 |
51 formatted_name = formatted_name[:open_bracket] + formatted_name[close_bracket:] | 53 formatted_n = formatted_n[:open_bracket] + formatted_n[close_bracket:] |
52 return formatted_name | 54 return formatted_n |
55 | |
53 | 56 |
54 def combine_metaphlan2_humann2(args): | 57 def combine_metaphlan2_humann2(args): |
55 clade_abundance = extract_clade_abundance(args.metaphlan2_file) | 58 clade_abund = extract_clade_abundance(args.metaphlan2_fp) |
56 overall_abundance = compute_overall_abundance(args.humann2_file) | 59 overall_abund = compute_overall_abundance(args.humann2_fp) |
57 | 60 |
58 with open(args.output_file, 'w') as output_file: | 61 with open(args.output_fp, 'w') as output_f: |
59 output_file.write('genus\t') | 62 s = 'genus\tgenus_abundance\tspecies\tspecies_abundance\t' |
60 output_file.write('genus_abundance\t') | 63 s = '%s\t%s_id\t%s_name\t%s_abundance\n' % (s, args.type, args.type, args.type) |
61 output_file.write('species\t') | 64 output_f.write(s) |
62 output_file.write('species_abundance\t') | 65 with open(args.humann2_fp, 'r') as humann2_f: |
63 output_file.write(args.type + '_id\t') | 66 for line in humann2_f.readlines(): |
64 output_file.write(args.type + '_name\t') | |
65 output_file.write(args.type + '_abundance\n') | |
66 with open(args.humann2_file, 'r') as humann2_file: | |
67 for line in humann2_file.readlines(): | |
68 if line.find('|') == -1: | 67 if line.find('|') == -1: |
69 continue | 68 continue |
70 | 69 |
71 split_line = line[:-1].split('\t') | 70 split_line = line[:-1].split('\t') |
72 abundance = 100*float(split_line[1])/overall_abundance | 71 abundance = 100*float(split_line[1])/overall_abund |
73 annotation = split_line[0].split('|') | 72 annotation = split_line[0].split('|') |
74 characteristic = annotation[0].split(':') | 73 charact = annotation[0].split(':') |
75 characteristic_id = characteristic[0] | 74 charact_id = charact[0] |
76 characteristic_name = '' | 75 char_name = '' |
77 if len(characteristic) > 1: | 76 if len(charact) > 1: |
78 characteristic_name = format_characteristic_name(characteristic[-1]) | 77 char_name = format_characteristic_name(charact[-1]) |
79 taxo = annotation[1].split('.') | 78 taxo = annotation[1].split('.') |
80 | 79 |
81 if taxo[0] == 'unclassified': | 80 if taxo[0] == 'unclassified': |
82 continue | 81 continue |
83 genus = taxo[0][3:] | 82 genus = taxo[0][3:] |
84 species = taxo[1][3:] | 83 species = taxo[1][3:] |
85 | 84 |
86 if not clade_abundance.has_key(genus): | 85 if genus not in clade_abund: |
87 print "no", genus, "found in", args.metaphlan2_file | 86 print("no %s found in %s" % (genus, args.metaphlan2_fp)) |
88 continue | 87 continue |
89 if not clade_abundance[genus]['species'].has_key(species): | 88 if species not in clade_abund[genus]['species']: |
90 print "no", species, "found in", args.metaphlan2_file, | 89 print("no %s found in %s for % s" % (species, args.metaphlan2_fp, genus)) |
91 print "for", genus | |
92 continue | 90 continue |
93 output_file.write(genus + '\t') | 91 |
94 output_file.write(clade_abundance[genus]['abundance'] + '\t') | 92 s = "%s\t%s\t" % (genus, clade_abund[genus]['abundance']) |
95 output_file.write(species + '\t') | 93 s += "%s\t%s\t" % (species, clade_abund[genus]['species'][species]) |
96 output_file.write(clade_abundance[genus]['species'][species] + '\t') | 94 s += "%s\t%s\t%s\n" % (charact_id, char_name, abundance) |
97 output_file.write(characteristic_id + '\t') | 95 output_f.write(s) |
98 output_file.write(characteristic_name + '\t') | 96 |
99 output_file.write(str(abundance) + '\n') | |
100 | 97 |
101 if __name__ == '__main__': | 98 if __name__ == '__main__': |
102 parser = argparse.ArgumentParser() | 99 parser = argparse.ArgumentParser() |
103 parser.add_argument('--humann2_file', required=True) | 100 parser.add_argument('--humann2_fp', required=True) |
104 parser.add_argument('--metaphlan2_file', required=True) | 101 parser.add_argument('--metaphlan2_fp', required=True) |
105 parser.add_argument('--output_file', required=True) | 102 parser.add_argument('--output_fp', required=True) |
106 parser.add_argument('--type', required=True, | 103 parser.add_argument( |
107 choices = ['gene_families','pathways']) | 104 '--type', |
105 required=True, | |
106 choices=['gene_families', 'pathways']) | |
108 args = parser.parse_args() | 107 args = parser.parse_args() |
109 | 108 |
110 combine_metaphlan2_humann2(args) | 109 combine_metaphlan2_humann2(args) |