# HG changeset patch
# User saket-choudhary
# Date 1412725381 14400
# Node ID 295395b4b28f1f9720c9394014cc67888ddb8c44
Uploaded
diff -r 000000000000 -r 295395b4b28f replace_delimiter/replace_delimiter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_delimiter/replace_delimiter.py Tue Oct 07 19:43:01 2014 -0400
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#By, Guruprasad Ananda.
+
+import sys, re
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit()
+
+def main():
+ if len(sys.argv) != 5:
+ stop_err("usage: convert_characters infile from_char to_char outfile")
+
+ try:
+ fin = open(sys.argv[1],'r')
+ except:
+ stop_err("Input file cannot be opened for reading.")
+
+ from_char = sys.argv[2]
+ to_char = sys.argv[3]
+
+ try:
+ fout = open(sys.argv[4],'w')
+ except:
+ stop_err("Output file cannot be opened for writing.")
+
+ char_dict = {
+ 'T': '\t',
+ 's': '\s',
+ 'Dt': '\.',
+ 'C': ',',
+ 'D': '-',
+ 'U': '_',
+ 'P': '\|',
+ 'Co': ':',
+ 'Sc': ';'
+ }
+ from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences.
+ to_char = char_dict[to_char]
+ skipped = 0
+
+ for line in fin:
+ line = line.strip()
+ try:
+ fout.write("%s\n" %(re.sub(from_ch, to_char, line)))
+ except:
+ skipped += 1
+
+ fout.close()
+ fin.close()
+ if skipped:
+ print "Skipped %d lines as invalid." %skipped
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 295395b4b28f replace_delimiter/replace_delimiter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_delimiter/replace_delimiter.xml Tue Oct 07 19:43:01 2014 -0400
@@ -0,0 +1,65 @@
+
+ delimiters
+ replace_delimiter.py $input $convert_from $convert_to $out_file1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Converts all delimiters of a specified type into a specified delimiter(Dots, Commas, Dashes, Underscores, Pipes, Colons, Semicolons).
+Tool and code borrowed from convert_delimiter tool(distributed with Galaxy).
+-----
+
+**Example**
+
+- Input file:
+
+ chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|-
+ chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|-
+ chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+
+ chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+
+
+- Converting all pipe delimiters of the above file to Commas will get::
+
+ chrX,151283558,151283724,NM_000808_exon_8_0_chrX_151283559_r,0,-
+ chrX,151370273,151370486,NM_000808_exon_9_0_chrX_151370274_r,0,-
+ chrX,151559494,151559583,NM_018558_exon_1_0_chrX_151559495_f,0,+
+ chrX,151564643,151564711,NM_018558_exon_2_0_chrX_151564644_f,0,+
+
+
+
diff -r 000000000000 -r 295395b4b28f replace_delimiter/test-data/replace_delimiter_input.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_delimiter/test-data/replace_delimiter_input.tsv Tue Oct 07 19:43:01 2014 -0400
@@ -0,0 +1,4 @@
+chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|-
+chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|-
+chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+
+chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+
diff -r 000000000000 -r 295395b4b28f replace_delimiter/test-data/replace_delimiter_output.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_delimiter/test-data/replace_delimiter_output.csv Tue Oct 07 19:43:01 2014 -0400
@@ -0,0 +1,4 @@
+chrX,151283558,151283724,NM_000808_exon_8_0_chrX_151283559_r,0,-
+chrX,151370273,151370486,NM_000808_exon_9_0_chrX_151370274_r,0,-
+chrX,151559494,151559583,NM_018558_exon_1_0_chrX_151559495_f,0,+
+chrX,151564643,151564711,NM_018558_exon_2_0_chrX_151564644_f,0,+
diff -r 000000000000 -r 295395b4b28f replace_delimiter/tool_dependecies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_delimiter/tool_dependecies.xml Tue Oct 07 19:43:01 2014 -0400
@@ -0,0 +1,3 @@
+
+
+