0
|
1 # python 2.7
|
|
2 # MB
|
|
3 # Convertion multiple .dta files to one .csv file with coordinates
|
|
4 # and one .csv file with sample names.
|
|
5
|
|
6 from optparse import OptionParser
|
|
7 from os.path import basename
|
|
8
|
|
9 # Function for merging all files in .csv format
|
|
10 # and extracting the headers of the files.
|
|
11 def main():
|
|
12 # extracting the users input
|
|
13 parser = OptionParser()
|
|
14 parser.add_option("--output")
|
|
15 parser.add_option("--output2")
|
|
16 parser.add_option("--input_file", action="append", default=[]) # multiple inputfiles
|
|
17 parser.add_option("--input_name", action="append", default=[]) # names of multiple inputfiles
|
|
18 (options, _) = parser.parse_args()
|
|
19
|
|
20
|
|
21 header = ""
|
|
22 header_temp = ""
|
|
23 # open every file and concatenate the files
|
|
24 with(open(options.output, "w")) as output:
|
|
25 with(open(options.output2, "w")) as output2:
|
|
26 for i, (input_file, input_name) in enumerate(zip(options.input_file, options.input_name)):
|
|
27 # open a input file
|
|
28 for j, line in enumerate(open(input_file, "r").readlines()):
|
|
29 line = line.strip()
|
|
30 split_tabs = line.split(' ')
|
|
31 number_columns = len(split_tabs)
|
|
32 # extract name of sample
|
|
33 if j == 0:
|
|
34 header_temp = "%s" %(line.replace(' ', '_'))
|
|
35 header += "%s\n"%(header_temp[1:-4])
|
|
36 # extract the coordinates
|
|
37 if number_columns == 3:
|
|
38 output.write("%f,%f,%f\n"%(float(split_tabs[0]),float(split_tabs[1]),float(split_tabs[2])))
|
|
39 output2.write("%s\n"%(header)) # writing header to output file #name of sample
|
|
40
|
|
41 main()
|