diff format_metaphlan2_output.py @ 0:2bfa9b200600 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/format_metaphlan2_output/ commit 36388cac89e4ffa55fe317d10cbf97346ab00a71-dirty
author bebatut
date Wed, 20 Apr 2016 07:52:41 -0400
parents
children 1e74cb2c8e67
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/format_metaphlan2_output.py	Wed Apr 20 07:52:41 2016 -0400
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+import argparse
+import re
+
+taxo_level_correspondance = {}
+taxo_level_correspondance['k'] = 'kingdom'
+taxo_level_correspondance['p'] = 'phylum'
+taxo_level_correspondance['c'] = 'class'
+taxo_level_correspondance['o'] = 'order'
+taxo_level_correspondance['f'] = 'family'
+taxo_level_correspondance['g'] = 'genus'
+taxo_level_correspondance['s'] = 'species'
+taxo_level_correspondance['t'] = 'strains'
+
+def write_taxo_abundance(output_files, level, taxo, abundance):
+    if not taxo_level_correspondance.has_key(level):
+        raise ValueError(level + ' is not a know taxonomic level')
+    output_files[taxo_level_correspondance[level]].write(taxo + '\t')
+    output_files[taxo_level_correspondance[level]].write(abundance + '\n')
+
+def format_metaphlan2_output(args):
+    taxo_levels_abundance_files = {}
+    taxo_levels_abundance_files['kingdom'] = open(args.kingdom_abundance_file, 'w')
+    taxo_levels_abundance_files['phylum'] = open(args.phylum_abundance_file, 'w')
+    taxo_levels_abundance_files['class'] = open(args.class_abundance_file, 'w')
+    taxo_levels_abundance_files['order'] = open(args.order_abundance_file, 'w')
+    taxo_levels_abundance_files['family'] = open(args.family_abundance_file, 'w')
+    taxo_levels_abundance_files['genus'] = open(args.genus_abundance_file, 'w')
+    taxo_levels_abundance_files['species'] = open(args.species_abundance_file, 'w')
+    taxo_levels_abundance_files['strains'] = open(args.strains_abundance_file, 'w')
+
+    for taxo_level_file in taxo_levels_abundance_files:
+        taxo_levels_abundance_files[taxo_level_file].write(taxo_level_file + '\t')
+        taxo_levels_abundance_files[taxo_level_file].write('abundance\n')
+
+    with open(args.metaphlan2_output, 'r') as input_file:
+        with open(args.all_taxo_level_abundance_file, 'w') as output_file:
+            output_file.write("kingdom\t")
+            output_file.write("phylum\t")
+            output_file.write("class\t")
+            output_file.write("order\t")
+            output_file.write("family\t")
+            output_file.write("genus\t")
+            output_file.write("species\t")
+            output_file.write("strains\t")
+            output_file.write("abundance\n")
+            levels_number = 8
+
+            for line in input_file.readlines():
+                if line.startswith("#"):
+                    continue
+
+                split_line = line[:-1].split('\t')
+                all_taxo = split_line[0]
+                abundance = split_line[1]
+
+                split_taxo = all_taxo.split('|')
+                for level in split_taxo:
+                    taxo = level.split('__')[1]
+                    taxo = taxo.replace("_"," ")
+                    output_file.write(taxo + '\t')
+
+                for i in range(len(split_taxo), levels_number):
+                    output_file.write('\t')
+
+                output_file.write(abundance + "\n")
+
+
+                last_taxo_level = split_taxo[-1].split('__')
+                taxo = last_taxo_level[1].replace("_"," ")
+                level = last_taxo_level[0]
+                write_taxo_abundance(taxo_levels_abundance_files, level, taxo, 
+                    abundance)
+
+    for taxo_level_file in taxo_levels_abundance_files:
+        taxo_levels_abundance_files[taxo_level_file].close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--metaphlan2_output', required=True)
+    parser.add_argument('--all_taxo_level_abundance_file', required=True)
+    parser.add_argument('--kingdom_abundance_file', required=True)
+    parser.add_argument('--phylum_abundance_file', required=True)
+    parser.add_argument('--class_abundance_file', required=True)
+    parser.add_argument('--order_abundance_file', required=True)
+    parser.add_argument('--family_abundance_file', required=True)
+    parser.add_argument('--genus_abundance_file', required=True)
+    parser.add_argument('--species_abundance_file', required=True)
+    parser.add_argument('--strains_abundance_file', required=True)
+    
+    args = parser.parse_args()
+
+    format_metaphlan2_output(args)
\ No newline at end of file