Mercurial > repos > petr-novak > dante
comparison fasta2database.py @ 10:d0431a839606 draft
Uploaded
| author | petr-novak |
|---|---|
| date | Wed, 14 Aug 2019 11:24:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 9:ed4d9ede9cb4 | 10:d0431a839606 |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 ''' | |
| 3 Helper script to create DANTE databese which can be used in second iteration | |
| 4 ''' | |
| 5 import sys | |
| 6 | |
| 7 fasta_input = sys.argv[1] | |
| 8 db_fasta_output_file = sys.argv[2] | |
| 9 db_classification_file = sys.argv[3] | |
| 10 classification_table = set() | |
| 11 # fasta header will be reformatted to correct REXdb classification | |
| 12 with open(fasta_input, 'r') as f, open(db_fasta_output_file, 'w') as out: | |
| 13 for line in f: | |
| 14 if line[0] == ">": | |
| 15 ## modify header | |
| 16 name, domain, classification = line.split(" ") | |
| 17 name_clean=name[1:].replace("-","_") | |
| 18 new_header = ">NA-{}__{}\n".format(domain, name_clean) | |
| 19 classification_string = "\t".join(classification.split("|")) | |
| 20 classification_table.add("{}\t{}".format(name_clean, classification_string)) | |
| 21 out.write(new_header) | |
| 22 else: | |
| 23 out.write(line) | |
| 24 with open(db_classification_file, 'w') as f: | |
| 25 f.writelines(classification_table) |
