Mercurial > repos > jay > pdaug_word_vector_descriptor
annotate PDAUG_Peptide_Core_Descriptors/PDAUG_Peptide_Core_Descriptors.py @ 6:2c598f028319 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit e8c8198105af7eab636fb2405e5ff335539ca14b"
author | jay |
---|---|
date | Sun, 31 Jan 2021 02:41:29 +0000 |
parents | a3a1d9bea1ad |
children |
rev | line source |
---|---|
0
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
1 from modlamp.core import BaseDescriptor |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
2 from modlamp.descriptors import PeptideDescriptor |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
3 import pandas as pd |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
4 import argparse, os |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
5 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
6 parser = argparse.ArgumentParser() |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
7 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
8 parser.add_argument("-I", "--InFile", required=True, default=None, help="Input file") |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
9 parser.add_argument("-O", "--OutFile", required=True, default=None, help="Output file") |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
10 parser.add_argument("-N", "--Ngrams", required=True, default=None, help="ngrams") |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
11 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
12 args = parser.parse_args() |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
13 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
14 file = open(args.InFile) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
15 lines = file.readlines() |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
16 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
17 Index = [] |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
18 Pep = [] |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
19 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
20 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
21 for line in lines: |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
22 if '>' in line: |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
23 Index.append(line.strip('\n')) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
24 else: |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
25 line = line.strip('\n') |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
26 line = line.strip('\r') |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
27 Pep.append(line) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
28 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
29 df = pd.DataFrame() |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
30 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
31 for i, l in enumerate(Pep): |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
32 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
33 D = PeptideDescriptor(l) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
34 D.count_ngrams([int(args.Ngrams)]) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
35 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
36 df1 = pd.DataFrame(D.descriptor, index=["sequence"+str(i),]) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
37 df = pd.concat([df, df1], axis=0) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
38 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
39 df = df.fillna(0) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
40 df.to_csv(args.OutFile, sep='\t', index=None) |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
41 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
42 |
a3a1d9bea1ad
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
43 |