comparison PDAUG_Merge_Dataframes/PDAUG_Merge_Dataframes.py @ 4:0b17bc2ddcdd draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
author jay
date Tue, 12 Jan 2021 19:50:39 +0000
parents 0973f093d98f
children
comparison
equal deleted inserted replaced
3:bda0527365da 4:0b17bc2ddcdd
1 import glob 1 import glob
2 import pandas as pd 2 import pandas as pd
3 import sys 3 import sys
4 4
5 files = sys.argv[1] 5
6 out_file = sys.argv[2] 6 def MergeData(infiles, add_class_label, class_label, OutPut):
7
8 data_frame = pd.DataFrame()
9
10 if add_class_label == 'True' or add_class_label == 'true':
11 for i, file in enumerate(infiles.split(',')):
12 df1 = pd.read_csv(file,sep='\t')
13 df2 = pd.DataFrame(df1.shape[0]*[i], columns=[class_label])
14 df3 = pd.concat([df1,df2], axis=1)
15 data_frame = pd.concat([data_frame,df3])
16 final_DF = data_frame.fillna(0)
17
18 else:
19
20 for file in infiles.split(','):
21 df1 = pd.read_csv(file,sep='\t')
22 data_frame = pd.concat([data_frame,df1])
23 final_DF = data_frame.fillna(0)
24
25 final_DF.to_csv(OutPut, sep="\t", index=False)
7 26
8 27
9 data_frame = pd.read_csv(files.split(',')[0],sep='\t') 28 if __name__=="__main__":
10 29
30 import argparse
31 parser = argparse.ArgumentParser()
32 parser.add_argument("-I", "--infiles", required=True, default=None, help=".tsv")
33 parser.add_argument("-L", "--add_class_label", required=False, default=False, help="Path to target tsv file")
34 parser.add_argument("-C", "--class_label", required=False, default='class_label', help="Path to target tsv file")
35 parser.add_argument("-O", "--OutPut", required=False, default='Out.tsv', help="Path to target tsv file")
11 36
12 for file in files.split(',')[1:]: 37 args = parser.parse_args()
13 38
14 df1 = pd.read_csv(file,sep='\t') 39 MergeData(args.infiles, args.add_class_label, args.class_label, args.OutPut)
15 data_frame = pd.concat([data_frame,df1])
16
17 final_DF = data_frame.fillna(0)
18
19 final_DF.to_csv(out_file,sep="\t", index=False)
20
21
22
23
24
25
26
27
28