annotate converter.py @ 0:aae5945b961e draft

Uploaded
author mb2013
date Tue, 20 May 2014 03:23:20 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
aae5945b961e Uploaded
mb2013
parents:
diff changeset
1 # python 2.7
aae5945b961e Uploaded
mb2013
parents:
diff changeset
2 # MB
aae5945b961e Uploaded
mb2013
parents:
diff changeset
3 # Convertion multiple .dta files to one .csv file with coordinates
aae5945b961e Uploaded
mb2013
parents:
diff changeset
4 # and one .csv file with sample names.
aae5945b961e Uploaded
mb2013
parents:
diff changeset
5
aae5945b961e Uploaded
mb2013
parents:
diff changeset
6 from optparse import OptionParser
aae5945b961e Uploaded
mb2013
parents:
diff changeset
7 from os.path import basename
aae5945b961e Uploaded
mb2013
parents:
diff changeset
8
aae5945b961e Uploaded
mb2013
parents:
diff changeset
9 # Function for merging all files in .csv format
aae5945b961e Uploaded
mb2013
parents:
diff changeset
10 # and extracting the headers of the files.
aae5945b961e Uploaded
mb2013
parents:
diff changeset
11 def main():
aae5945b961e Uploaded
mb2013
parents:
diff changeset
12 # extracting the users input
aae5945b961e Uploaded
mb2013
parents:
diff changeset
13 parser = OptionParser()
aae5945b961e Uploaded
mb2013
parents:
diff changeset
14 parser.add_option("--output")
aae5945b961e Uploaded
mb2013
parents:
diff changeset
15 parser.add_option("--output2")
aae5945b961e Uploaded
mb2013
parents:
diff changeset
16 parser.add_option("--input_file", action="append", default=[]) # multiple inputfiles
aae5945b961e Uploaded
mb2013
parents:
diff changeset
17 parser.add_option("--input_name", action="append", default=[]) # names of multiple inputfiles
aae5945b961e Uploaded
mb2013
parents:
diff changeset
18 (options, _) = parser.parse_args()
aae5945b961e Uploaded
mb2013
parents:
diff changeset
19
aae5945b961e Uploaded
mb2013
parents:
diff changeset
20
aae5945b961e Uploaded
mb2013
parents:
diff changeset
21 header = ""
aae5945b961e Uploaded
mb2013
parents:
diff changeset
22 header_temp = ""
aae5945b961e Uploaded
mb2013
parents:
diff changeset
23 # open every file and concatenate the files
aae5945b961e Uploaded
mb2013
parents:
diff changeset
24 with(open(options.output, "w")) as output:
aae5945b961e Uploaded
mb2013
parents:
diff changeset
25 with(open(options.output2, "w")) as output2:
aae5945b961e Uploaded
mb2013
parents:
diff changeset
26 for i, (input_file, input_name) in enumerate(zip(options.input_file, options.input_name)):
aae5945b961e Uploaded
mb2013
parents:
diff changeset
27 # open a input file
aae5945b961e Uploaded
mb2013
parents:
diff changeset
28 for j, line in enumerate(open(input_file, "r").readlines()):
aae5945b961e Uploaded
mb2013
parents:
diff changeset
29 line = line.strip()
aae5945b961e Uploaded
mb2013
parents:
diff changeset
30 split_tabs = line.split(' ')
aae5945b961e Uploaded
mb2013
parents:
diff changeset
31 number_columns = len(split_tabs)
aae5945b961e Uploaded
mb2013
parents:
diff changeset
32 # extract name of sample
aae5945b961e Uploaded
mb2013
parents:
diff changeset
33 if j == 0:
aae5945b961e Uploaded
mb2013
parents:
diff changeset
34 header_temp = "%s" %(line.replace(' ', '_'))
aae5945b961e Uploaded
mb2013
parents:
diff changeset
35 header += "%s\n"%(header_temp[1:-4])
aae5945b961e Uploaded
mb2013
parents:
diff changeset
36 # extract the coordinates
aae5945b961e Uploaded
mb2013
parents:
diff changeset
37 if number_columns == 3:
aae5945b961e Uploaded
mb2013
parents:
diff changeset
38 output.write("%f,%f,%f\n"%(float(split_tabs[0]),float(split_tabs[1]),float(split_tabs[2])))
aae5945b961e Uploaded
mb2013
parents:
diff changeset
39 output2.write("%s\n"%(header)) # writing header to output file #name of sample
aae5945b961e Uploaded
mb2013
parents:
diff changeset
40
aae5945b961e Uploaded
mb2013
parents:
diff changeset
41 main()