changeset 0:21b2dc3934ed draft

Uploaded
author immport-devteam
date Mon, 27 Feb 2017 12:50:08 -0500
parents
children 4f28ee74079b
files extract_pop/extractpop.py extract_pop/extractpop.xml extract_pop/test-data/input.txt extract_pop/test-data/output.flowtext
diffstat 4 files changed, 190 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_pop/extractpop.py	Mon Feb 27 12:50:08 2017 -0500
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+
+from __future__ import print_function
+import sys
+import pandas as pd
+
+from argparse import ArgumentParser
+
+
+def is_int(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False
+
+
+def extract_pop(in_file, pop_list, out_file):
+    df = pd.read_table(in_file, dtype={'Population': object})
+    dfout = df.loc[df['Population'].isin(pop_list)]
+    dfout.to_csv(out_file, sep="\t", index=False)
+    return
+
+
+def remove_pop(in_file, pop_list, out_file):
+    df = pd.read_table(in_file, dtype={'Population': object})
+    dfout = df.loc[~df['Population'].isin(pop_list)]
+    dfout.to_csv(out_file, sep="\t", index=False)
+    return
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="ExtractPop",
+             description="Extract events associated to given population numbers.")
+
+    parser.add_argument(
+            '-i',
+            dest="input_file",
+            required=True,
+            help="File location for the text file.")
+
+    parser.add_argument(
+            '-p',
+            dest="pops",
+            required=True,
+            help="List of populations to extract.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    parser.add_argument(
+            '-m',
+            dest="method",
+            required=True,
+            help="What to do with the populations.")
+
+    args = parser.parse_args()
+
+    # check populations
+    default_values = ["i.e.:2,3,11,25", "default", "Default"]
+    populations = []
+    if args.pops:
+        if args.pops not in default_values:
+            tmp_pops = args.pops.split(",")
+            for popn in tmp_pops:
+                populations.append(popn.strip())
+        else:
+            sys.exit(2)
+    for pops in populations:
+        if not is_int(pops):
+            sys.exit(3)
+    if args.method == "selected":
+        extract_pop(args.input_file, populations, args.output_file)
+    else:
+        remove_pop(args.input_file, populations, args.output_file)
+    sys.exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_pop/extractpop.xml	Mon Feb 27 12:50:08 2017 -0500
@@ -0,0 +1,90 @@
+<tool id="extract_pop" name="Extract populations" version="1.0">
+  <description>of interest from FLOCK or Cross Sample output.</description>
+  <requirements>
+    <requirement type="package" version="0.17.1">pandas</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="2" level="fatal" description="Please provide a comma separated list of populations to extract." />
+    <exit_code range="3" level="fatal" description="The populations to extract must be integers (i.e,: 1,2,4.)" />
+  </stdio>
+  <command><![CDATA[
+    python $__tool_directory__/extractpop.py -i "${input}" -o "${output}" -p "${population}" -m "${method}"
+  ]]>
+  </command>
+  <inputs>
+    <param format="flowclr" name="input" type="data" label="Source file"/>
+    <param name="population" type="text" label="Populations:" value="i.e.:2,3,11,25"/>
+  <param name="method" type="select" label="What would you like to do?">
+    <option value="selected">Keep only these populations.</option>
+    <option value="removed">Remove these populations.</option>
+  </param>
+  </inputs>
+  <outputs>
+    <data format="flowtext" name="output" label="Populations ${population} ${method} from ${input.name}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="input.txt"/>
+      <param name="population" value="2,4"/>
+    <param name="method" value="selected"/>
+      <output name="output" file="output.flowtext"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+   This tool extracts events from given populations from FLOCK or Cross Sample outputs.
+
+-----
+
+**Input**
+
+FLOCK or Cross Sample output - a table of the fluorescence intensities for each event and the population associated with each.
+
+**Output**
+
+The input file filtered for selected populations.
+
+-----
+
+**Example 1**
+
+*Input* - fluorescence intensities per marker and population ID per event::
+
+   Marker1 Marker2 Marker3 Population
+   34      45      12      1
+   33      65      10      5
+   19      62      98      2
+   12      36      58      1
+
+*Populations selected:* 2
+
+*Method:* Keep only the selected populations
+
+*Output* - fluorescence intensities per marker and population ID per event::
+
+   Marker1 Marker2 Marker3 Population
+   19      62      98      2
+
+**Example 2**
+
+*Input* - fluorescence intensities per marker and population ID per event::
+
+   Marker1 Marker2 Marker3 Population
+   34      45      12      1
+   33      65      10      5
+   19      62      98      2
+   12      36      58      1
+
+ *Populations selected:* 2
+
+ *Method:* Remove the selected populations
+
+ *Output* - fluorescence intensities per marker and population ID per event::
+
+    Marker1 Marker2 Marker3 Population
+    34      45      12      1
+    33      65      10      5
+    12      36      58      1
+
+  ]]>
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_pop/test-data/input.txt	Mon Feb 27 12:50:08 2017 -0500
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CCR3	PP CD8	APC CCR4	Population
+449	157	551	129	169	292	1
+894	1023	199	277	320	227	4
+262	73	437	69	0	146	1
+340	115	509	268	0	74	2
+316	76	50	0	60	129	5
+394	144	83	138	335	194	3
+383	139	499	0	0	224	6
+800	1023	239	284	288	280	2
+388	97	534	111	83	177	4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_pop/test-data/output.flowtext	Mon Feb 27 12:50:08 2017 -0500
@@ -0,0 +1,5 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CCR3	PP CD8	APC CCR4	Population
+894	1023	199	277	320	227	4
+340	115	509	268	0	74	2
+800	1023	239	284	288	280	2
+388	97	534	111	83	177	4