Mercurial > repos > jay > pdaug_sequence_property_based_descriptors
view PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py @ 9:15db59617371 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit d396d7ff89705cc0dd626ed32c45a9f4029b1b05"
author | jay |
---|---|
date | Mon, 10 Jan 2022 04:51:54 +0000 |
parents | 0a31e26e68d7 |
children |
line wrap: on
line source
import pandas as pd def TSVtoFASTA(infile, method, firstdatafile, seconddatafile, outfile, clmpepid, slcclasslabel, peps): fn = [firstdatafile, seconddatafile] df = pd.read_csv(infile, sep="\t") if clmpepid == None: pass else: names = df[clmpepid].tolist() peps = df[peps].tolist() if method == "withoutlabel": f = open(outfile,'w') if clmpepid is not None: for i,n in enumerate(peps): f.write(">"+names[i]+'\n') f.write(n+'\n') f.close() else: for i,n in enumerate(peps): f.write(">"+str(i)+'\n') f.write(n+'\n') f.close() elif method == "withlabel": labels = df[slcclasslabel].tolist() label = list(set(labels)) if clmpepid is None: for i, l in enumerate(label): f = open(fn[i],'w') print('ok1') for i, L in enumerate(labels): if l == L: f.write(">"+str(i)+"_"+str(l)+'\n') f.write(peps[i]+'\n') f.close() else: for i, l in enumerate(label): f = open(fn[i],'w') for i, L in enumerate(labels): if l == L: f.write(">"+names[i]+"_"+l+'\n') f.write(peps[i]+'\n') f.close() if __name__=="__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") parser.add_argument("-F", "--FirstDataFile", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") parser.add_argument("-S", "--SecondDataFile", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") parser.add_argument("-C", "--ClmPepID", required=False, default=None, help="Peptide Column Name") parser.add_argument("-L", "--SlcClassLabel", required=False, default="Class_label", help="Class Label Column Name") parser.add_argument("-P", "--PeptideColumn", required=True, default=None, help="Class Label Column Name") args = parser.parse_args() TSVtoFASTA(args.InFile, args.Method, args.FirstDataFile, args.SecondDataFile, args.OutFile, args.ClmPepID, args.SlcClassLabel, args.PeptideColumn)