Repository 'rearrange_columns'
hg clone https://toolshed.g2.bx.psu.edu/repos/immport-devteam/rearrange_columns

Changeset 0:d2cdffd27293 (2017-02-27)
Next changeset 1:ce206587d42f (2020-07-16)
Commit message:
Uploaded
added:
rearrange_columns/editColumnHeadings.py
rearrange_columns/editColumnHeadings.xml
rearrange_columns/test-data/input1.txt
rearrange_columns/test-data/input2.txt
rearrange_columns/test-data/input3.txt
rearrange_columns/test-data/output1.flowtext
rearrange_columns/test-data/output2.flowtext
rearrange_columns/test-data/output3.flowtext
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/editColumnHeadings.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/editColumnHeadings.py Mon Feb 27 13:05:18 2017 -0500
[
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+
+from __future__ import print_function
+import sys
+
+from argparse import ArgumentParser
+
+
+def is_integer(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False
+
+
+def rearrange_file(input_file, col_order, col_names, output_file):
+    with open(input_file, "r") as infl, open(output_file, "w") as outf:
+        # headers
+        hdrs = infl.readline().strip()
+        current_hdrs = hdrs.split("\t")
+        if not col_order and col_names:
+            if len(col_names) != len(current_hdrs):
+                sys.stderr.write("There are " + str(len(current_hdrs)) + " columns but " + str(len(col_names)) + " marker names were provided\n")
+                sys.exit(4)
+        if col_names:
+            tmp_hdr = []
+            for i in range(0, len(col_names)):
+                if col_names[i].strip():
+                    tmp_hdr.append(col_names[i].strip())
+                else:
+                    if col_order:
+                        tmp_hdr.append(current_hdrs[col_order[i]])
+                    else:
+                        tmp_hdr.append(current_hdrs[i])
+            hdrs = ("\t".join(tmp_hdr))
+        elif col_order:
+            tp_hdr = []
+            for j in col_order:
+                tp_hdr.append(current_hdrs[j])
+            hdrs = ("\t".join(tp_hdr))
+
+        outf.write(hdrs + "\n")
+
+        # columns
+        for lines in infl:
+            cols = lines.strip().split("\t")
+            if not col_order:
+                col_order = [x for x in range(0, len(current_hdrs))]
+            outf.write("\t".join([cols[c] for c in col_order]) + "\n")
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="editColumnHeadings",
+             description="Cut, rearrange and rename columns in a tab-separated file.")
+
+    parser.add_argument(
+            '-i',
+            dest="input_file",
+            required=True,
+            help="File location for the text file.")
+
+    parser.add_argument(
+            '-c',
+            dest="columns",
+            help="Columns to keep in the order to keep them in.")
+
+    parser.add_argument(
+            '-n',
+            dest="column_names",
+            help="Column names if renaming.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    args = parser.parse_args()
+
+    # check column indices
+    default_value_col = ["i.e.:1,5,2", "default", "Default"]
+    col_order = []
+    if args.columns:
+        if args.columns not in default_value_col:
+            tmp_col = args.columns.split(",")
+            if len(tmp_col) == 1:
+                if not tmp_col[0].strip():
+                    col_order = []
+                elif not is_integer(tmp_col[0].strip()):
+                    sys.exit(2)
+                else:
+                    col_order.append(int(tmp_col[0].strip()) - 1)
+            else:
+                for c in range(0, len(tmp_col)):
+                    if not is_integer(tmp_col[c].strip()):
+                        sys.exit(3)
+                    else:
+                        col_order.append(int(tmp_col[c].strip()) - 1)
+
+    # check column names
+    default_value_nms = ["i.e.:Marker1,,Marker4", "default", "Default"]
+    col_names = []
+    if args.column_names:
+        if args.column_names not in default_value_nms:
+            col_names = args.column_names.split(",")
+            if col_order:
+                if len(col_order) != len(col_names):
+                    sys.stderr.write("There are " + str(len(col_order)) + " columns selected and " + str(len(col_names)) + " marker names\n")
+                    sys.exit(4)
+
+    rearrange_file(args.input_file, col_order, col_names, args.output_file)
+
+    sys.exit(0)
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/editColumnHeadings.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/editColumnHeadings.xml Mon Feb 27 13:05:18 2017 -0500
[
@@ -0,0 +1,134 @@
+<tool id="edit_rearrange_columns" name="Remove, rearrange and/or rename columns" version="1.1">
+  <description>in txt-converted FCS files.</description>
+  <stdio>
+    <exit_code range="2" level="fatal" description="Please provide a comma separated list of integers for columns you want to keep." />
+    <exit_code range="3" level="fatal" description="Please provide integers for columns you want to keep." />
+    <exit_code range="4" level="fatal" description="List of column headings and list of selected columns must match. For instance for columns 1,3,4: Marker1,,Marker3." />
+  </stdio>
+  <command><![CDATA[
+    python $__tool_directory__/editColumnHeadings.py -o "${output_file}" -i "${input}"
+ #if $columns
+    -c "${columns}"
+ #end if
+ #if $colnames
+    -n "${colnames}"
+ #end if
+  ]]>
+  </command>
+  <inputs>
+    <param format="flowtext" name="input" type="data" label="Text file"/>
+    <param name="columns" type="text" label="Column order:" value="i.e.:1,5,2" optional="true" help="By default, will keep all columns in the same order."/>
+    <param name="colnames" type="text" label="New column headings:" value="i.e.:Marker1,,Marker4" optional="true" help="By default, will not change the column headings. Check below for more details.">
+    </param>
+  </inputs>
+  <outputs>
+    <data format="flowtext" name="output_file" label="Rearranged ${input.name}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="input1.txt"/>
+      <param name="columns" value="3,4,5,6"/>
+      <param name="colnames" value="CD4,CCR3,CD8,CCR7"/>
+      <output name="output_file" file="output1.flowtext"/>
+    </test>
+    <test>
+      <param name="input" value="input2.txt"/>
+      <param name="columns" value="2,3,6,1"/>
+      <param name="colnames" value="i.e.:Marker1,,Marker4"/>
+      <output name="output_file" file="output2.flowtext"/>
+    </test>
+    <test>
+      <param name="input" value="input3.txt"/>
+      <param name="columns" value="i.e.:1,5,2"/>
+      <param name="colnames" value="M1,M2,M3,M4,M5,M6"/>
+      <output name="output_file" file="output3.flowtext"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+   This tool enables the removal, rearrangement and/or renaming of text file columns.
+
+-----
+
+**Input files**
+
+This tool requires txt, flowtext or tabular files as input.
+
+**Column order**
+
+Please indicate columns to keep in the order in which they should be (comma-separated list).
+This field is optional.
+
+**Column names**
+
+Please indicate the new columns headings in the order in which they should appear in the ouptut file (comma-separated list). The number of headings should match the number of columns in the output.
+This field is optional.
+
+.. class: warningmark
+
+When providing column order AND new column headings the column count for each must match. See below for an example.
+
+**Output file**
+
+The output flowtext file is a copy of the input file with rearranged and/or renamed columns.
+
+-----
+
+**Examples**
+
+**Input file**::
+
+   Marker1 Marker2 Marker3 Marker4 Marker5
+   4       45      123     1956    62534
+   3       65      104     1254    36576
+   7       26      767     4124    42235
+   4       56      323     7623    74634
+   5       83      532     6256    34763
+   4       15      877     9312    21265
+
+*Example 1*
+
+- Column order: 5,3,2,4
+- Column names: Default
+
+*Output1*::
+
+   Marker5 Marker3 Marker2 Marker4
+   62534   123     45      1956
+   36576   104     65      1254
+   42235   767     26      4124
+   74634   323     56      7623
+   34763   532     83      6256
+   21265   877     15      9312
+
+*Example 2*
+
+- Column order: 5,3,2,4
+- Column names: Mar34,,Mar7,
+
+*Output2*::
+
+   Mar34 Marker3 Mar7 Marker4
+   62534 123     45   1956
+   36576 104     65   1254
+   42235 767     26   4124
+   74634 323     56   7623
+   34763 532     83   6256
+   21265 877     15   9312
+
+*Example 3*
+
+- Column order: Default
+- Column names: Mar23,,,Mar7,Mar8
+
+*Output3*::
+
+   Mar23 Marker2 Marker3 Mar7 Mar8
+   4     45      123     1956 62534
+   3     65      104     1254 36576
+   7     26      767     4124 42235
+   4     56      323     7623 74634
+   5     83      532     6256 34763
+   4     15      877     9312 21265
+  ]]>
+  </help>
+</tool>
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/test-data/input1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/input1.txt Mon Feb 27 13:05:18 2017 -0500
b
@@ -0,0 +1,10 @@
+Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4
+449 157 551 129 169 292
+894 1023 199 277 320 227
+262 73 437 69 0 146
+340 115 509 268 0 74
+316 76 50 0 60 129
+394 144 83 138 335 194
+383 139 499 0 0 224
+800 1023 239 284 288 280
+388 97 534 111 83 177
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/test-data/input2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/input2.txt Mon Feb 27 13:05:18 2017 -0500
b
@@ -0,0 +1,10 @@
+Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5
+363 76 550 200 0 127
+372 126 519 44 51 148
+1023 1023 289 401 362 254
+770 1023 175 361 225 237
+384 111 525 121 0 138
+602 578 385 286 222 131
+788 1023 216 310 270 294
+420 211 552 479 0 62
+668 1019 73 193 227 132
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/test-data/input3.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/input3.txt Mon Feb 27 13:05:18 2017 -0500
b
@@ -0,0 +1,10 @@
+Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA
+289 56 438 0 626 480
+352 153 30 147 483 386
+383 190 156 228 734 408
+261 62 432 121 598 555
+451 120 537 338 568 201
+373 104 3 110 621 584
+418 105 561 0 610 562
+358 185 0 292 641 327
+733 970 139 227 293 259
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/test-data/output1.flowtext
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/output1.flowtext Mon Feb 27 13:05:18 2017 -0500
b
@@ -0,0 +1,10 @@
+CD4 CCR3 CD8 CCR7
+551 129 169 292
+199 277 320 227
+437 69 0 146
+509 268 0 74
+50 0 60 129
+83 138 335 194
+499 0 0 224
+239 284 288 280
+534 111 83 177
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/test-data/output2.flowtext
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/output2.flowtext Mon Feb 27 13:05:18 2017 -0500
b
@@ -0,0 +1,10 @@
+Side Scatter FITC CD4 APC CCR5 Forward Scatter
+76 550 127 363
+126 519 148 372
+1023 289 254 1023
+1023 175 237 770
+111 525 138 384
+578 385 131 602
+1023 216 294 788
+211 552 62 420
+1019 73 132 668
b
diff -r 000000000000 -r d2cdffd27293 rearrange_columns/test-data/output3.flowtext
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/output3.flowtext Mon Feb 27 13:05:18 2017 -0500
b
@@ -0,0 +1,10 @@
+M1 M2 M3 M4 M5 M6
+289 56 438 0 626 480
+352 153 30 147 483 386
+383 190 156 228 734 408
+261 62 432 121 598 555
+451 120 537 338 568 201
+373 104 3 110 621 584
+418 105 561 0 610 562
+358 185 0 292 641 327
+733 970 139 227 293 259