changeset 0:eaf7c9b0a1a4 draft default tip

Uploaded
author saket-choudhary
date Tue, 07 Oct 2014 19:38:23 -0400
parents
children
files merge_columns_with_delimiter/merge_columns_with_delimiter.py merge_columns_with_delimiter/merge_columns_with_delimiter.xml merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_input.tsv merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_output.tsv merge_columns_with_delimiter/tool_dependecies.xml
diffstat 5 files changed, 146 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_columns_with_delimiter/merge_columns_with_delimiter.py	Tue Oct 07 19:38:23 2014 -0400
@@ -0,0 +1,56 @@
+import sys, re
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit()
+
+def __main__():
+    try:
+        infile =  open ( sys.argv[1], 'r')
+        outfile = open ( sys.argv[2], 'w')
+    except:
+        stop_err( 'Cannot open or create a file\n' )
+
+    if len( sys.argv ) < 5:
+        stop_err( 'No columns to merge' )
+    else:
+        delimiter = sys.argv[3]
+        cols = sys.argv[4:]
+
+    skipped_lines = 0
+
+    char_dict = {
+        'T': '\t',
+        's': '\s',
+        'Dt': '\.',
+        'Sl': '\\',
+        'Sr': '/',
+        'C': ',',
+        'D': '-',
+        'U': '_',
+        'P': '\|',
+        'Co': ':',
+        'Sc': ';',
+        'Ep': ''
+    }
+    for line in infile:
+        line = line.rstrip( '\r\n' )
+        if line and not line.startswith( '#' ):
+            fields = line.split( '\t' )
+            line += '\t'
+            for i, col in enumerate(cols):
+                try:
+                    if i!=len(cols)-1:
+                        line += fields[ int( col ) -1 ] + char_dict[delimiter]
+                    else:
+                        line += fields[ int( col ) -1 ]
+
+                except:
+                    skipped_lines += 1
+
+            print >>outfile, line
+
+    if skipped_lines > 0:
+        print 'Skipped %d invalid lines' % skipped_lines
+
+if __name__ == "__main__" : __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_columns_with_delimiter/merge_columns_with_delimiter.xml	Tue Oct 07 19:38:23 2014 -0400
@@ -0,0 +1,83 @@
+<tool id="merge_cols_with_delimiter1" name="Merge Columns with Delimiters" version="1.0.1">
+  <description>together</description>
+  <command interpreter="python">
+      merge_columns_with_delimiter.py
+      $input1
+      $out_file1
+      $delimiter
+      $col1
+      $col2
+      #for $col in $columns
+        ${col.datacol}
+      #end for
+
+  </command>
+  <inputs>
+    <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
+    <param name="delimiter" type="select" label="Delimiter">
+      <option value="s">Whitespaces</option>
+      <option value="T">Tabs</option>
+      <option value="Ep">EmptyString</option>
+      <option value="Dt">Dots</option>
+      <option value="C">Commas</option>
+      <option value="D">Dashes</option>
+      <option value="Sl">Left Slashes</option>
+      <option value="Sr">Right Slashes</option>
+      <option value="U">Underscores</option>
+      <option value="P">Pipes</option>
+      <option value="Co">Colons</option>
+      <option value="Sc">Semicolons</option>
+    </param>
+    <param name="col1" label="Merge column" type="data_column" data_ref="input1" />
+    <param name="col2" label="with column" type="data_column" data_ref="input1" help="Need to add more columns? Use controls below."/>
+
+    <repeat name="columns" title="Columns">
+      <param name="datacol" label="Add column" type="data_column" data_ref="input1" />
+    </repeat>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="out_file1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="merge_columns_with_delimiter_input.tsv"/>
+      <param name="delimiter" value="D" />
+      <param name="col1" value="5" />
+      <param name="col2" value="1" />
+      <param name="datacol" value="3" />
+      <output name="out_file1" file="merge_columns_with_delimiter_output.tsv"/>
+    </test>
+  </tests>
+<help>
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+**What it does**
+
+This tool merges columns together separating them with the specified delimiter.
+Any number of valid columns can be merged in any order.
+Tool and code inspired from 'merge_columns' tool distributed with Galaxy.
+
+-----
+
+**Example**
+
+Input dataset (five columns: c1, c2, c3, c4, and c5)::
+
+   1 10   1000  gene1 chr
+   2 100  1500  gene2 chr
+
+merging columns "**c5,c1**" with "-"(dash) will return::
+
+   1 10   1000  gene1 chr chr-1
+   2 100  1500  gene2 chr chr-2
+
+.. class:: warningmark
+
+Note that all original columns are preserved and the result of merge is added as the rightmost column.
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_input.tsv	Tue Oct 07 19:38:23 2014 -0400
@@ -0,0 +1,2 @@
+1	10	1000	gene1	chr
+2	100	1500	gene2	chr
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_output.tsv	Tue Oct 07 19:38:23 2014 -0400
@@ -0,0 +1,2 @@
+1	10	1000	gene1	chr	chr-1-1000
+2	100	1500	gene2	chr	chr-2-1500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_columns_with_delimiter/tool_dependecies.xml	Tue Oct 07 19:38:23 2014 -0400
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<tool_dependency>
+</tool_dependency>