comparison PDAUG_Peptide_Core_Descriptors/PDAUG_Peptide_Core_Descriptors.py @ 0:7557b48b2872 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author jay
date Wed, 28 Oct 2020 02:10:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7557b48b2872
1 from modlamp.core import BaseDescriptor
2 from modlamp.descriptors import PeptideDescriptor
3 import pandas as pd
4 import argparse, os
5
6 parser = argparse.ArgumentParser()
7
8 parser.add_argument("-I", "--InFile", required=True, default=None, help="Input file")
9 parser.add_argument("-O", "--OutFile", required=True, default=None, help="Output file")
10 parser.add_argument("-N", "--Ngrams", required=True, default=None, help="ngrams")
11
12 args = parser.parse_args()
13
14 file = open(args.InFile)
15 lines = file.readlines()
16
17 Index = []
18 Pep = []
19
20
21 for line in lines:
22 if '>' in line:
23 Index.append(line.strip('\n'))
24 else:
25 line = line.strip('\n')
26 line = line.strip('\r')
27 Pep.append(line)
28
29 df = pd.DataFrame()
30
31 for i, l in enumerate(Pep):
32
33 D = PeptideDescriptor(l)
34 D.count_ngrams([int(args.Ngrams)])
35
36 df1 = pd.DataFrame(D.descriptor, index=["sequence"+str(i),])
37 df = pd.concat([df, df1], axis=0)
38
39 df = df.fillna(0)
40 df.to_csv(args.OutFile, sep='\t', index=None)
41
42
43