view fasta_to_tabular.py @ 2:091edad7622f draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:25:01 -0500
parents 9d189d08f2ad
children
line wrap: on
line source

#!/usr/bin/env python
# This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
"""
Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
Output: tabular
format convert: fasta to tabular
"""

import sys


def stop_err(msg):
    sys.exit(msg)


def __main__():
    if len(sys.argv) != 5:
        stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
    infile = sys.argv[1]
    outfile = sys.argv[2]
    keep_first = int(sys.argv[3])
    descr_split = int(sys.argv[4])
    if keep_first == 0:
        keep_first = None
    elif descr_split == 1:
        # Added one for the ">" character
        # (which is removed if using descr_split > 1)
        keep_first += 1
    if descr_split < 1:
        stop_err("Bad description split value (should be 1 or more)")
    with open(outfile, 'w') as out, open(infile) as in_fh:
        for i, line in enumerate(in_fh):
            line = line.rstrip('\r\n')
            if not line or line.startswith('#'):
                continue
            if line.startswith('>'):
                # Don't want any existing tabs to trigger extra columns:
                line = line.replace('\t', ' ')
                if i > 0:
                    out.write('\n')
                if descr_split == 1:
                    out.write(line[1:keep_first])
                else:
                    words = line[1:].split(None, descr_split - 1)
                    # apply any truncation to first word (the id)
                    words[0] = words[0][0:keep_first]
                    # pad with empty columns if required
                    words += [""] * (descr_split - len(words))
                    out.write("\t".join(words))
                out.write('\t')
            else:
                out.write(line)
        if i > 0:
            out.write('\n')


if __name__ == "__main__":
    __main__()