annotate PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py @ 6:1fafae68b9bb draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 32b9c48c81639a81be24bb3e2f48dc0a81c0deca"
author jay
date Sun, 09 Jan 2022 03:41:43 +0000
parents 2f9d9e6d91ce
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
1
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
2 import pandas as pd
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
4
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
5 def TSVtoFASTA(infile, method, firstdatafile, seconddatafile, outfile, clmpepid, slcclasslabel, peps):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
6
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
7
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
8 fn = [firstdatafile, seconddatafile]
0
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
9
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
10
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
11 df = pd.read_csv(infile, sep="\t")
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
12 if clmpepid == None:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
13 pass
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
14 else:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
15 names = df[clmpepid].tolist()
0
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
16
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
17 peps = df[peps].tolist()
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
18
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
19 if method == "withoutlabel":
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
20 f = open(outfile,'w')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
21 if clmpepid is not None:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
22 for i,n in enumerate(peps):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
23 f.write(">"+names[i]+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
24 f.write(n+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
25 f.close()
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
26 else:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
27 for i,n in enumerate(peps):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
28 f.write(">"+str(i)+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
29 f.write(n+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
30 f.close()
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
31
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
32 elif method == "withlabel":
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
33 labels = df[slcclasslabel].tolist()
0
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
34
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
35 label = list(set(labels))
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
36
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
37 if clmpepid is None:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
38 for i, l in enumerate(label):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
39 f = open(fn[i],'w')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
40 print('ok1')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
41 for i, L in enumerate(labels):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
42 if l == L:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
43 f.write(">"+str(i)+"_"+str(l)+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
44 f.write(peps[i]+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
45 f.close()
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
46 else:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
47 for i, l in enumerate(label):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
48 f = open(fn[i],'w')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
49 for i, L in enumerate(labels):
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
50 if l == L:
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
51 f.write(">"+names[i]+"_"+l+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
52 f.write(peps[i]+'\n')
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
53 f.close()
0
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
54
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
55 if __name__=="__main__":
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
56
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
57 import argparse
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
58 parser = argparse.ArgumentParser()
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
59 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv")
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
60 parser.add_argument("-F", "--FirstDataFile", required=False, default='FirstDataFile.fasta', help="Path to target tsv file")
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
61 parser.add_argument("-S", "--SecondDataFile", required=False, default='SecondDataFile.fasta', help="Path to target tsv file")
0
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
62 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file")
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
63 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file")
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
64 parser.add_argument("-C", "--ClmPepID", required=False, default=None, help="Peptide Column Name")
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
65 parser.add_argument("-L", "--SlcClassLabel", required=False, default="Class_label", help="Class Label Column Name")
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
66 parser.add_argument("-P", "--PeptideColumn", required=True, default=None, help="Class Label Column Name")
0
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
67 args = parser.parse_args()
e650de82bcc7 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
68
3
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
69 TSVtoFASTA(args.InFile, args.Method, args.FirstDataFile, args.SecondDataFile, args.OutFile, args.ClmPepID, args.SlcClassLabel, args.PeptideColumn)
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
70
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
71
2f9d9e6d91ce "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 2
diff changeset
72