Mercurial > repos > drosofff > msp_fasta_tabular_converter
view fasta_tabular_converter.py @ 1:2f7278120be9 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_fasta_tabular_converter commit 6a93f2809e2939f9d847c3238bfbff8ead367d9f
author | drosofff |
---|---|
date | Tue, 22 Mar 2016 18:54:00 -0400 |
parents | 951cb6b3979b |
children | 330dd8a8c31a |
line wrap: on
line source
#!/usr/bin/python # import sys import string import argparse from collections import defaultdict def Parser(): the_parser = argparse.ArgumentParser() the_parser.add_argument( '--input', action="store", type=str, help="input file") the_parser.add_argument( '--output', action="store", type=str, help="output converted file") the_parser.add_argument( '--type', action="store", type=str, help="type of convertion") args = the_parser.parse_args() return args def readfasta_writetabular(fasta, tabular, mode="oneline"): F = open(fasta, "r") for line in F: if line[0] == ">": try: seqdic["".join(stringlist)] += 1 # to dump the sequence of the previous item - try because of first missing stringlist variable except: pass stringlist=[] else: stringlist.append(line[:-1]) seqdic["".join(stringlist)] += 1 # for the last sequence F.close() F = open(tabular, "w") for seq in sorted(seqdic, key=seqdic.get, reverse=True): print >> F, "%s\t%s" % (seq, seqdic[seq]) F.close() def readtabular_writefasta(tabular, fasta): F = open(tabular, "r") Fw = open(fasta, "w") counter = 0 for line in F: fields = line.split() for i in range(int(fields[1])): counter += 1 print >> Fw, ">%s\n%s" % (counter, fields[0]) F.close() Fw.close() def readtabular_writefastaweighted (tabular, fasta): F = open(tabular, "r") Fw = open(fasta, "w") counter = 0 for line in F: counter += 1 fields = line[:-1].split() print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0]) F.close() Fw.close() def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed): F = open(fastaweigthed_input, "r") number_reads = 0 for line in F: if line[0] == ">": weigth = int(line[1:-1].split("_")[-1]) number_reads += weigth else: seqdic[line[:-1]] += weigth F.close() F = open(fastaweigthed_reparsed, "w") n=0 for seq in sorted(seqdic, key=seqdic.get, reverse=True): n += 1 print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq) F.close() print "%s reads collapsed" % number_reads def readfastaeighted_writefasta(fastaweigthed, fasta): F = open(fastaweigthed, "r") Fw = open(fasta, "w") counter = 0 for line in F: if line[0] == ">": weigth = int(line[1:-1].split("_")[-1]) else: seq = line[:-1] for i in range (weigth): counter += 1 print >> Fw, ">%s\n%s" % (counter, seq) F.close() Fw.close() def main(input, output, type): if type == "fasta2tabular": readfasta_writetabular(input, output) elif type == "tabular2fasta": readtabular_writefasta(input, output) elif type == "tabular2fastaweight": readtabular_writefastaweighted (input, output) elif type == "fastaweight2fastaweight": readfastaeighted_writefastaweighted(input, output) elif type == "fastaweight2fasta": readfastaeighted_writefasta(input, output) if __name__ == "__main__": seqdic = defaultdict(int) args = Parser() main (args.input, args.output, args.type)