# HG changeset patch # User saket-choudhary # Date 1412725103 14400 # Node ID eaf7c9b0a1a4aa0aa73337ec0621f1089334cd61 Uploaded diff -r 000000000000 -r eaf7c9b0a1a4 merge_columns_with_delimiter/merge_columns_with_delimiter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/merge_columns_with_delimiter.py Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,56 @@ +import sys, re + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + try: + infile = open ( sys.argv[1], 'r') + outfile = open ( sys.argv[2], 'w') + except: + stop_err( 'Cannot open or create a file\n' ) + + if len( sys.argv ) < 5: + stop_err( 'No columns to merge' ) + else: + delimiter = sys.argv[3] + cols = sys.argv[4:] + + skipped_lines = 0 + + char_dict = { + 'T': '\t', + 's': '\s', + 'Dt': '\.', + 'Sl': '\\', + 'Sr': '/', + 'C': ',', + 'D': '-', + 'U': '_', + 'P': '\|', + 'Co': ':', + 'Sc': ';', + 'Ep': '' + } + for line in infile: + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ): + fields = line.split( '\t' ) + line += '\t' + for i, col in enumerate(cols): + try: + if i!=len(cols)-1: + line += fields[ int( col ) -1 ] + char_dict[delimiter] + else: + line += fields[ int( col ) -1 ] + + except: + skipped_lines += 1 + + print >>outfile, line + + if skipped_lines > 0: + print 'Skipped %d invalid lines' % skipped_lines + +if __name__ == "__main__" : __main__() diff -r 000000000000 -r eaf7c9b0a1a4 merge_columns_with_delimiter/merge_columns_with_delimiter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/merge_columns_with_delimiter.xml Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,83 @@ + + together + + merge_columns_with_delimiter.py + $input1 + $out_file1 + $delimiter + $col1 + $col2 + #for $col in $columns + ${col.datacol} + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +----- + +**What it does** + +This tool merges columns together separating them with the specified delimiter. +Any number of valid columns can be merged in any order. +Tool and code inspired from 'merge_columns' tool distributed with Galaxy. + +----- + +**Example** + +Input dataset (five columns: c1, c2, c3, c4, and c5):: + + 1 10 1000 gene1 chr + 2 100 1500 gene2 chr + +merging columns "**c5,c1**" with "-"(dash) will return:: + + 1 10 1000 gene1 chr chr-1 + 2 100 1500 gene2 chr chr-2 + +.. class:: warningmark + +Note that all original columns are preserved and the result of merge is added as the rightmost column. + + diff -r 000000000000 -r eaf7c9b0a1a4 merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_input.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_input.tsv Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,2 @@ +1 10 1000 gene1 chr +2 100 1500 gene2 chr diff -r 000000000000 -r eaf7c9b0a1a4 merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_output.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_output.tsv Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,2 @@ +1 10 1000 gene1 chr chr-1-1000 +2 100 1500 gene2 chr chr-2-1500 diff -r 000000000000 -r eaf7c9b0a1a4 merge_columns_with_delimiter/tool_dependecies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/tool_dependecies.xml Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,3 @@ + + +