Mercurial > repos > devteam > fasta_to_tabular
view fasta_to_tabular.py @ 2:091edad7622f draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 07:25:01 -0500 |
parents | 9d189d08f2ad |
children |
line wrap: on
line source
#!/usr/bin/env python # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools """ Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) Output: tabular format convert: fasta to tabular """ import sys def stop_err(msg): sys.exit(msg) def __main__(): if len(sys.argv) != 5: stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") infile = sys.argv[1] outfile = sys.argv[2] keep_first = int(sys.argv[3]) descr_split = int(sys.argv[4]) if keep_first == 0: keep_first = None elif descr_split == 1: # Added one for the ">" character # (which is removed if using descr_split > 1) keep_first += 1 if descr_split < 1: stop_err("Bad description split value (should be 1 or more)") with open(outfile, 'w') as out, open(infile) as in_fh: for i, line in enumerate(in_fh): line = line.rstrip('\r\n') if not line or line.startswith('#'): continue if line.startswith('>'): # Don't want any existing tabs to trigger extra columns: line = line.replace('\t', ' ') if i > 0: out.write('\n') if descr_split == 1: out.write(line[1:keep_first]) else: words = line[1:].split(None, descr_split - 1) # apply any truncation to first word (the id) words[0] = words[0][0:keep_first] # pad with empty columns if required words += [""] * (descr_split - len(words)) out.write("\t".join(words)) out.write('\t') else: out.write(line) if i > 0: out.write('\n') if __name__ == "__main__": __main__()