annotate PDAUG_Merge_Dataframes/PDAUG_Merge_Dataframes.py @ 6:04b48352f8bf draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit e8c8198105af7eab636fb2405e5ff335539ca14b"
author jay
date Sun, 31 Jan 2021 02:12:12 +0000
parents e62e03a4d81e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
1 import glob
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
2 import pandas as pd
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
3 import sys
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
4
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
5
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
6 def MergeData(infiles, add_class_label, class_label, OutPut):
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
7
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
8 data_frame = pd.DataFrame()
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
9
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
10 if add_class_label == 'True' or add_class_label == 'true':
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
11 for i, file in enumerate(infiles.split(',')):
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
12 df1 = pd.read_csv(file,sep='\t')
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
13 df2 = pd.DataFrame(df1.shape[0]*[i], columns=[class_label])
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
14 df3 = pd.concat([df1,df2], axis=1)
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
15 data_frame = pd.concat([data_frame,df3])
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
16 final_DF = data_frame.fillna(0)
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
17
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
18 else:
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
19
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
20 for file in infiles.split(','):
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
21 df1 = pd.read_csv(file,sep='\t')
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
22 data_frame = pd.concat([data_frame,df1])
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
23 final_DF = data_frame.fillna(0)
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
24
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
25 final_DF.to_csv(OutPut, sep="\t", index=False)
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
26
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
27
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
28 if __name__=="__main__":
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
29
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
30 import argparse
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
31 parser = argparse.ArgumentParser()
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
32 parser.add_argument("-I", "--infiles", required=True, default=None, help=".tsv")
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
33 parser.add_argument("-L", "--add_class_label", required=False, default=False, help="Path to target tsv file")
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
34 parser.add_argument("-C", "--class_label", required=False, default='class_label', help="Path to target tsv file")
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
35 parser.add_argument("-O", "--OutPut", required=False, default='Out.tsv', help="Path to target tsv file")
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
36
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
37 args = parser.parse_args()
0
3f9bdcf83244 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff changeset
38
4
e62e03a4d81e "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents: 0
diff changeset
39 MergeData(args.infiles, args.add_class_label, args.class_label, args.OutPut)