Mercurial > repos > jay > pdaug_basic_plots
diff PDAUG_Peptide_Core_Descriptors/PDAUG_Peptide_Core_Descriptors.py @ 0:7d247e27ff11 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 01:52:18 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PDAUG_Peptide_Core_Descriptors/PDAUG_Peptide_Core_Descriptors.py Wed Oct 28 01:52:18 2020 +0000 @@ -0,0 +1,43 @@ +from modlamp.core import BaseDescriptor +from modlamp.descriptors import PeptideDescriptor +import pandas as pd +import argparse, os + +parser = argparse.ArgumentParser() + +parser.add_argument("-I", "--InFile", required=True, default=None, help="Input file") +parser.add_argument("-O", "--OutFile", required=True, default=None, help="Output file") +parser.add_argument("-N", "--Ngrams", required=True, default=None, help="ngrams") + +args = parser.parse_args() + +file = open(args.InFile) +lines = file.readlines() + +Index = [] +Pep = [] + + +for line in lines: + if '>' in line: + Index.append(line.strip('\n')) + else: + line = line.strip('\n') + line = line.strip('\r') + Pep.append(line) + +df = pd.DataFrame() + +for i, l in enumerate(Pep): + + D = PeptideDescriptor(l) + D.count_ngrams([int(args.Ngrams)]) + + df1 = pd.DataFrame(D.descriptor, index=["sequence"+str(i),]) + df = pd.concat([df, df1], axis=0) + +df = df.fillna(0) +df.to_csv(args.OutFile, sep='\t', index=None) + + +