comparison bionumeric_converter.py @ 1:07dfb8fd47f4 draft default tip

planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author nml
date Mon, 13 May 2019 12:59:15 -0400
parents b000a3130db8
children
comparison
equal deleted inserted replaced
0:b000a3130db8 1:07dfb8fd47f4
12 parser = argparse.ArgumentParser() 12 parser = argparse.ArgumentParser()
13 parser.add_argument( 13 parser.add_argument(
14 '-f', 14 '-f',
15 '--filename', 15 '--filename',
16 required=True, 16 required=True,
17 help='Specify your tsv input') 17 help='Specify your biohansel tsv or other tabular separated input')
18 parser.add_argument( 18 parser.add_argument(
19 '-o', 19 '-o',
20 '--output', 20 '--output',
21 default='output.csv', 21 default='output.csv',
22 help='Specify output name') 22 help='Specify output name')
23 args = parser.parse_args() 23 args = parser.parse_args()
24 tsv_file = args.filename 24 tsv_file = args.filename
25 out_name = args.output 25 out_name = args.output
26 26
27 no_comma_tsv = comma_remover(tsv_file) 27 df_input = pd.read_csv(tsv_file, sep='\t')
28 df = qc_shortener(no_comma_tsv) 28
29 df_no_comma = df_input.replace(',', '/', regex=True)
30 df = qc_shortener(df_no_comma)
29 df.to_csv(out_name, index=False) 31 df.to_csv(out_name, index=False)
30
31 # Remove comma function:
32
33
34 def comma_remover(tsv_file):
35 # Create a table from the tsv file as an input into the dataframe.
36 df = pd.read_csv(tsv_file, sep='\t')
37 # Change all commas to / in the QC message
38 no_comma_tsv = df.replace(',', '/', regex=True)
39 return no_comma_tsv
40 32
41 # Shorten QC results: 33 # Shorten QC results:
42 34
43 35
36 def splittingstrings(string, length):
37 return (string[0+i:length+i] for i in range(0, len(string), length))
38
39
44 def qc_shortener(df): 40 def qc_shortener(df):
45 for count in df.index: 41 for i, row in df.iterrows():
46 message = str(df.at[count, 'qc_message']) 42 message = str(row['qc_message'])
47 if len(message) > 150: 43 if len(message) > 150:
48 results = message.find('|') 44 message_list = list(splittingstrings(message, 150))
49 new_message = "Truncated after first '|' : " + message[0:results] 45 df.at[i, 'qc_message'] = message_list[0]
50 df['qc_message'] = df['qc_message'].replace(message, new_message) 46 for val in range(1, len(message_list)):
47 df.at[i, 'qc_message_{}'.format(val)] = message_list[val]
51 return df 48 return df
52 49
53 50
54 if __name__ == '__main__': 51 if __name__ == '__main__':
55 main() 52 main()