Mercurial > repos > saket-choudhary > replace_delimiters
changeset 0:295395b4b28f draft default tip
Uploaded
author | saket-choudhary |
---|---|
date | Tue, 07 Oct 2014 19:43:01 -0400 |
parents | |
children | |
files | replace_delimiter/replace_delimiter.py replace_delimiter/replace_delimiter.xml replace_delimiter/test-data/replace_delimiter_input.tsv replace_delimiter/test-data/replace_delimiter_output.csv replace_delimiter/tool_dependecies.xml |
diffstat | 5 files changed, 131 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_delimiter/replace_delimiter.py Tue Oct 07 19:43:01 2014 -0400 @@ -0,0 +1,55 @@ +#!/usr/bin/env python +#By, Guruprasad Ananda. + +import sys, re + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + if len(sys.argv) != 5: + stop_err("usage: convert_characters infile from_char to_char outfile") + + try: + fin = open(sys.argv[1],'r') + except: + stop_err("Input file cannot be opened for reading.") + + from_char = sys.argv[2] + to_char = sys.argv[3] + + try: + fout = open(sys.argv[4],'w') + except: + stop_err("Output file cannot be opened for writing.") + + char_dict = { + 'T': '\t', + 's': '\s', + 'Dt': '\.', + 'C': ',', + 'D': '-', + 'U': '_', + 'P': '\|', + 'Co': ':', + 'Sc': ';' + } + from_ch = char_dict[from_char] + '+' #making an RE to match 1 or more occurences. + to_char = char_dict[to_char] + skipped = 0 + + for line in fin: + line = line.strip() + try: + fout.write("%s\n" %(re.sub(from_ch, to_char, line))) + except: + skipped += 1 + + fout.close() + fin.close() + if skipped: + print "Skipped %d lines as invalid." %skipped + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_delimiter/replace_delimiter.xml Tue Oct 07 19:43:01 2014 -0400 @@ -0,0 +1,65 @@ +<tool id="convertdelimiter" name="Convert"> + <description>delimiters</description> + <command interpreter="python">replace_delimiter.py $input $convert_from $convert_to $out_file1</command> + <inputs> + <param name="convert_from" type="select" label="Convert all"> + <option value="s">Whitespaces</option> + <option value="T">Tabs</option> + <option value="Dt">Dots</option> + <option value="C">Commas</option> + <option value="D">Dashes</option> + <option value="U">Underscores</option> + <option value="P">Pipes</option> + <option value="Co">Colons</option> + <option value="Sc">Semicolons</option> + </param> + <param format="txt" name="input" type="data" label="in Dataset"/> + <param name="convert_to" type="select" label="to "> + <option value="s">Whitespaces</option> + <option value="T">Tabs</option> + <option value="Dt">Dots</option> + <option value="C">Commas</option> + <option value="D">Dashes</option> + <option value="U">Underscores</option> + <option value="P">Pipes</option> + <option value="Co">Colons</option> + <option value="Sc">Semicolons</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="out_file1" /> + </outputs> + <tests> + <test> + <param name="convert_from" value="P"/> + <param name="input" value="replace_delimiter_input.tsv" ftype="tabular"/> + <param name="convert_to" value="C"/> + <output name="out_file1" file="replace_delimiter_output.csv"/> + </test> + </tests> + <help> + +**What it does** + +Converts all delimiters of a specified type into a specified delimiter(Dots, Commas, Dashes, Underscores, Pipes, Colons, Semicolons). +Tool and code borrowed from convert_delimiter tool(distributed with Galaxy). +----- + +**Example** + +- Input file: + + chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|- + chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|- + chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+ + chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+ + +- Converting all pipe delimiters of the above file to Commas will get:: + + chrX,151283558,151283724,NM_000808_exon_8_0_chrX_151283559_r,0,- + chrX,151370273,151370486,NM_000808_exon_9_0_chrX_151370274_r,0,- + chrX,151559494,151559583,NM_018558_exon_1_0_chrX_151559495_f,0,+ + chrX,151564643,151564711,NM_018558_exon_2_0_chrX_151564644_f,0,+ + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_delimiter/test-data/replace_delimiter_input.tsv Tue Oct 07 19:43:01 2014 -0400 @@ -0,0 +1,4 @@ +chrX||151283558|151283724|NM_000808_exon_8_0_chrX_151283559_r|0|- +chrX|151370273|151370486|NM_000808_exon_9_0_chrX_151370274_r|0|- +chrX|151559494|151559583|NM_018558_exon_1_0_chrX_151559495_f|0|+ +chrX|151564643|151564711|NM_018558_exon_2_0_chrX_151564644_f||||0|+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_delimiter/test-data/replace_delimiter_output.csv Tue Oct 07 19:43:01 2014 -0400 @@ -0,0 +1,4 @@ +chrX,151283558,151283724,NM_000808_exon_8_0_chrX_151283559_r,0,- +chrX,151370273,151370486,NM_000808_exon_9_0_chrX_151370274_r,0,- +chrX,151559494,151559583,NM_018558_exon_1_0_chrX_151559495_f,0,+ +chrX,151564643,151564711,NM_018558_exon_2_0_chrX_151564644_f,0,+