annotate PDAUG_Peptide_Core_Descriptors/PDAUG_Peptide_Core_Descriptors.py @ 1:69a1ba3d6d29 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 3c91f421d26c8f42cf2671e47db735d2cf69dde8"
author jay
date Tue, 29 Dec 2020 04:54:02 +0000
parents 1199b572b86c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
1 from modlamp.core import BaseDescriptor
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
2 from modlamp.descriptors import PeptideDescriptor
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
3 import pandas as pd
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
4 import argparse, os
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
5
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
6 parser = argparse.ArgumentParser()
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
7
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
8 parser.add_argument("-I", "--InFile", required=True, default=None, help="Input file")
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
9 parser.add_argument("-O", "--OutFile", required=True, default=None, help="Output file")
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
10 parser.add_argument("-N", "--Ngrams", required=True, default=None, help="ngrams")
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
11
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
12 args = parser.parse_args()
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
13
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
14 file = open(args.InFile)
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
15 lines = file.readlines()
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
16
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
17 Index = []
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
18 Pep = []
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
19
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
20
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
21 for line in lines:
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
22 if '>' in line:
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
23 Index.append(line.strip('\n'))
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
24 else:
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
25 line = line.strip('\n')
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
26 line = line.strip('\r')
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
27 Pep.append(line)
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
28
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
29 df = pd.DataFrame()
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
30
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
31 for i, l in enumerate(Pep):
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
32
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
33 D = PeptideDescriptor(l)
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
34 D.count_ngrams([int(args.Ngrams)])
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
35
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
36 df1 = pd.DataFrame(D.descriptor, index=["sequence"+str(i),])
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
37 df = pd.concat([df, df1], axis=0)
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
38
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
39 df = df.fillna(0)
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
40 df.to_csv(args.OutFile, sep='\t', index=None)
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
41
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
42
1199b572b86c "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
43