| 
10
 | 
     1 #!/usr/bin/env python3
 | 
| 
 | 
     2 '''
 | 
| 
 | 
     3 Helper script to create DANTE databese which can be used in second iteration
 | 
| 
 | 
     4 '''
 | 
| 
 | 
     5 import sys
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 fasta_input = sys.argv[1]
 | 
| 
 | 
     8 db_fasta_output_file = sys.argv[2]
 | 
| 
 | 
     9 db_classification_file = sys.argv[3]
 | 
| 
 | 
    10 classification_table = set()
 | 
| 
 | 
    11 # fasta header will be reformatted to correct REXdb classification
 | 
| 
 | 
    12 with open(fasta_input, 'r') as f, open(db_fasta_output_file, 'w') as out:
 | 
| 
 | 
    13     for line in f:
 | 
| 
 | 
    14         if line[0] == ">":
 | 
| 
 | 
    15             ## modify header
 | 
| 
 | 
    16             name, domain, classification = line.split(" ")
 | 
| 
 | 
    17             name_clean=name[1:].replace("-","_")
 | 
| 
 | 
    18             new_header = ">NA-{}__{}\n".format(domain, name_clean)
 | 
| 
 | 
    19             classification_string = "\t".join(classification.split("|"))
 | 
| 
 | 
    20             classification_table.add("{}\t{}".format(name_clean, classification_string))
 | 
| 
 | 
    21             out.write(new_header)
 | 
| 
 | 
    22         else:
 | 
| 
 | 
    23             out.write(line)
 | 
| 
 | 
    24 with open(db_classification_file, 'w') as f:
 | 
| 
 | 
    25     f.writelines(classification_table)
 |