Galaxy |

Changeset 0:aae5945b961e (2014-05-20)

Next changeset 1:6e7cf5ed7d62 (2014-05-20)

Commit message:
Uploaded

added:
converter.py

diff -r 000000000000 -r aae5945b961e converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/converter.py Tue May 20 03:23:20 2014 -0400

[

@@ -0,0 +1,41 @@
+# python 2.7
+# MB
+# Convertion multiple .dta files to one .csv file with coordinates
+# and one .csv file with sample names.
+
+from optparse import OptionParser
+from os.path import basename
+
+# Function for merging all files in .csv format
+# and extracting the headers of the files.
+def main():
+    # extracting the users input
+    parser = OptionParser()
+    parser.add_option("--output")
+    parser.add_option("--output2")
+    parser.add_option("--input_file", action="append", default=[]) # multiple inputfiles
+    parser.add_option("--input_name", action="append", default=[]) # names of multiple inputfiles
+    (options, _) = parser.parse_args()
+
+
+    header = ""
+    header_temp = ""
+    # open every file and concatenate the files
+    with(open(options.output, "w")) as output:
+ with(open(options.output2, "w")) as output2:
+            for i, (input_file, input_name) in enumerate(zip(options.input_file, options.input_name)):
+ # open a input file
+             for j, line in enumerate(open(input_file, "r").readlines()):
+                    line = line.strip()
+     split_tabs = line.split('  ')
+     number_columns = len(split_tabs)
+     # extract name of sample
+         if j == 0:
+                     header_temp = "%s" %(line.replace(' ', '_'))
+ header += "%s\n"%(header_temp[1:-4])
+     # extract the coordinates
+     if number_columns == 3:
+                     output.write("%f,%f,%f\n"%(float(split_tabs[0]),float(split_tabs[1]),float(split_tabs[2])))
+     output2.write("%s\n"%(header)) # writing header to output file     #name of sample
+
+main()