Mercurial > repos > nml > csvtk_summary

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,337 @@
+<macros>
+    <token name="@VERSION@">0.20.0</token>
+    <token name="@GALAXY_VERSION@">galaxy0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">csvtk</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_cmd">
+        <version_command>csvtk version</version_command>
+    </xml>
+    <xml name="text_sanitizer">
+        <sanitizer>
+            <valid initial="string.printable">
+                    <remove value="&apos;"/>
+            </valid>
+        </sanitizer>
+    </xml>
+    <xml name="multi_input">
+        <param type="data" name="in_1" format="csv,tabular"
+            multiple="true" optional="false"
+            label="Specify TSV or CSV file inputs"
+            help="Input tsv or csv files to analyze. HOWEVER, they must all be the **same file type** or the tool will fail/not give correct results!"
+        />
+    </xml>
+    <xml name="singular_input">
+        <param type="data" name="in_1" format="csv,tabular"
+            multiple="false" optional="false"
+            label="Specify an input TSV or CSV file"
+            help="Input a TSV or CSV file to work on"
+        />
+    </xml>
+    <xml name="ignore_case">
+        <param type="boolean" name="ignore_case"
+            falsevalue="" truevalue="-i"
+            checked="false"
+            argument="-i"
+            label="Ignore cell case?"
+            help="ABC == abc"
+        />
+    </xml>
+    <xml name="global_parameters">
+        <section name="global_param" title="csvtk Global Parameters" expanded="false">
+            <param type="boolean" name="header"
+                falsevalue="-H" truevalue=""
+                checked="true"
+                argument="-H"
+                label="Input file has a header line"
+            />
+            <param type="boolean" name="illegal_rows"
+                falsevalue="" truevalue="-I"
+                checked="false"
+                argument="-I"
+                label="Ignore illegal rows"
+                help="Use if file has illegal rows as defined in the help section"
+            />
+            <param type="boolean" name="empty_rows"
+                falsevalue="" truevalue="-E"
+                checked="false"
+                argument="-E"
+                label="Ignore empty rows"
+                help="Ignore rows with no data (only needed if input has empty rows)"
+            />
+            <param type="boolean" name="lazy_quotes"
+                falsevalue="" truevalue="-l"
+                checked="false"
+                argument="-l"
+                label="File has Lazy quotes"
+                help="(TSV files only) If Yes, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field"
+            />
+        </section>
+    </xml>
+    <xml name="fields_input">
+        <conditional name="column_text" >
+            <param type="select" name="select" label="Select input column(s) based on" argument="-F -f">
+                <option value="string">Column Name(s)</option>
+                <option value="column">Column Number(s)</option>
+            </param>
+            <when value="column">
+                <param type="data_column" name="in_text"
+                    data_ref="in_1"
+                    multiple="true"
+                    label="Input column number(s)"
+                    help="Select column(s) to use for analysis"
+                />
+            </when>
+            <when value="string">
+                <param type="text" name="in_text"
+                    optional="false"
+                    label="Input column name(s)"
+                    help="Multiple columns can be given if separated by a ' , '.
+                    Column numbers can be given too - ex. '1,2' will target columns 1 and 2.
+                    Please see the help section below for more detailed info">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                                <remove value="&apos;"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="singular_fields_input">
+        <conditional name="column_text" >
+            <param type="select" name="select" label="Select input column based on" argument="-f">
+                <option value="string">Column Name</option>
+                <option value="column">Column Number</option>
+            </param>
+            <when value="column">
+                <param type="data_column" name="in_text"
+                    data_ref="in_1"
+                    multiple="false"
+                    label="Input column number"
+                    help="Select column to use for analysis"
+                />
+            </when>
+            <when value="string">
+                <param type="text" name="in_text"
+                    optional="false"
+                    label="Input column name"
+                    help="Input column name or number ex. 'Length' or '1'.
+                    Please see the help section below for more detailed info">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                                <remove value="&apos;"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="plot_field">
+        <conditional name="column_text" >
+            <param type="select" name="select" label="Select input data column based on" argument="-f">
+                <option value="string">Column Name</option>
+                <option value="column">Column Number</option>
+            </param>
+            <when value="column">
+                <param type="data_column" name="in_text"
+                    data_ref="in_1"
+                    multiple="false"
+                    label="Input data column number"
+                />
+            </when>
+            <when value="string">
+                <param type="text" name="in_text"
+                    optional="false"
+                    label="Input data column name"
+                    help="Can use column name or column number">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                                <remove value="&apos;"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="groups_input">
+        <conditional name="group_field" >
+            <param type="select" name="select_group" label="Select column to group data" argument="-g" optional="false" help="Specify a single column that is used to create data groups. An example is shown in the help section">
+                <option value="none">None</option>
+                <option value="string">Column Name</option>
+                <option value="column">Column Number</option>
+            </param>
+            <when value="none" />
+            <when value="column">
+                <param type="data_column" name="in_text"
+                    data_ref="in_1"
+                    multiple="false"
+                    label="Group by column number"
+                />
+            </when>
+            <when value="string">
+                <param type="text" name="in_text"
+                    optional="false"
+                    label="Group by column name"
+                    help="Can use column name or number">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                                <remove value="&apos;"/>
+                        </valid>
+                    </sanitizer>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="global_plot_parameters">
+        <section name="plot_parameters" title="Advanced Optional Plot Parameters" expanded="false">
+            <param type="float" name="figure_height"
+                argument="--height"
+                optional="true"
+                label="Figure Height (Default 4.5)"
+            />
+            <param type="float" name="figure_width"
+                argument="--width"
+                optional="true"
+                label="Figure Width (Default 1.5)"
+            />
+            <param type="float" name="tick_width"
+                argument="--tick-width"
+                optional="true"
+                label="Axis Tick Width (Default 1.5)"
+            />
+            <param type="text" name="title"
+                argument="--title"
+                optional="true"
+                label="Specify Figure Title">
+                <sanitizer>
+                    <valid initial="string.printable">
+                        <remove value="&apos;"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param type="text" name="x_label"
+                argument="--xlab"
+                optional="true"
+                label="Specify X-axis label">
+                <sanitizer>
+                    <valid initial="string.printable">
+                        <remove value="&apos;"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param type="float" name="min_x"
+                argument="--x-min"
+                optional="true"
+                label="Minimum value of X-axis (float)"
+            />
+            <param type="float" name="max_x"
+                argument="--x-max"
+                optional="true"
+                label="Maximum value of X-axis (float)"
+            />
+            <param type="text" name="y_label"
+                argument="--ylab"
+                optional="true"
+                label="Specify Y-axis label">
+                <sanitizer>
+                    <valid initial="string.printable">
+                        <remove value="&apos;"/>
+                    </valid>
+                </sanitizer>
+            </param>
+            <param type="float" name="min_y"
+                argument="--y-min"
+                optional="true"
+                label="Minimum value of Y-axis (float)"
+            />
+            <param type="float" name="max_y"
+                argument="--y-max"
+                optional="true"
+                label="Maximum value of Y-axis (float)"
+            />
+        </section>
+    </xml>
+    <token name="@HELP_COLUMNS@">
+Column Name Input Help
+######################
+
+- Multiple names can be given if separated by a ' , '.
+
+    - ex. 'ID,Organism' would target the columns named ID and Organism for the function
+
+- Column names are case SeNsitive
+
+- Column numbers can also be given:
+
+    -ex. '1,2,3' or '1-3' for inputting columns 1-3.
+
+- You can also specify all but unwanted column(s) with a ' - '.
+
+    - ex. '-ID' would target all columns but the ID column
+
+----
+    </token>
+    <token name="@HELP_INPUT_DATA@">
+Input Data
+##########
+
+::
+
+    **Limitations of Input Data**
+
+    1. The CSV parser requires all the lines have same number of fields/columns.
+        If your file has illegal rows, set the "Illegal Rows" parameter to "Yes" to pass your data through
+        Even lines with spaces will cause error.
+        Example bad table below.
+
+    2. By default, csvtk thinks files have header rows. If your file does not, set global parameter
+        "Has Header Row" to "No"
+
+    3. Column names should be unique and are case sensitive!
+
+    4. Lines starting with "#" or "$" will be ignored, if in the header row
+
+    5. If " exists in tab-delimited files, set Lazy quotes global parameter to "Yes"
+
+Example bad table:
+
++--------+--------+--------+--------+
+| Head 1 | Head 2 | Head 3 | Head 3 |
++========+========+========+========+
+| 1      | 2      | 3      |        |
++--------+--------+--------+--------+
+| this   | will   |        | break  |
++--------+--------+--------+--------+
+
+Bad tables may work if both the "Ignore Illegal Rows" and "Ignore Empty Rows" global parameters are set to "Yes",
+But there is no guarentee of that!
+
+----
+    </token>
+    <token name="@HELP_END_STATEMENT@">
+More Information
+################
+For information from the creators of csvtk, please visit their site at: https://bioinf.shenwei.me/csvtk/
+
+Although be aware that some features may not be available and some small changes were made to work with Galaxy.
+
+**Notable changes from their documentation:**
+
+- Cannot specify multiple file header names (IE cannot use "name;username" as a valid column match)
+
+- No single quotes / apostrophes allowed in text inputs
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">@ARTICLE{a1,
+                title = {csvtk - CSV/TSV Toolkit},
+                author = {Wei Shen},
+                url = {https://github.com/shenwei356/csvtk}
+                }
+            }</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/summary.xml	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,396 @@
+<tool id="csvtk_summary" name="csvtk-summary" version="@VERSION@+@GALAXY_VERSION@">
+    <description> statistics of selected fields</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_cmd" />
+    <command detect_errors="exit_code"><![CDATA[
+
+## Set Up Input ##
+##################
+
+#set input_list = list()
+#for $repeat in $field
+    #silent $input_list.append(str($repeat.column_text.in_text) + ":" + str($repeat.analysis_type))
+#end for
+
+#set input_total = ",".join($input_list)
+
+###################
+## Start Command ##
+###################
+
+csvtk summary --num-cpus "\${GALAXY_SLOTS:-1}"
+
+    ## Add additional flags as specified ##
+    #######################################
+    $global_param.illegal_rows
+    $global_param.empty_rows
+    $global_param.header
+    $global_param.lazy_quotes
+
+    ## Set Tabular input/output flag if input is tabular ##
+    #######################################################
+    #if $in_1.is_of_type("tabular"):
+        -t -T
+    #end if
+
+    ## Set Input ##
+    ###############
+    '$in_1'
+
+    ## Specify fields ##
+    ####################
+    -f '$input_total'
+
+    ## other ##
+    ###########
+
+    #if $group_field.select_group != "none"
+        -g '$group_field.in_text'
+    #end if
+
+    -s '$extra.separator'
+    -S '$extra.rand_int'
+    -n '$decimal_width'
+    $ignore_non_digits
+
+
+    ## To output ##
+    ###############
+    > summary
+
+    ]]></command>
+    <inputs>
+        <expand macro="singular_input" />
+        <repeat name="field" title="Select Column and Operator" min="1">
+            <expand macro="singular_fields_input" />
+            <param name="analysis_type" type="select" label="Analysis Type" help="Select analysis type to do on the chosen field">
+                <option value="collapse">Collapse</option>
+                <option value="count">Count</option>
+                <option value="countn">Count of Digits (countn)</option>
+                <option value="countunique">Count Unique</option>
+                <option value="first">First Value</option>
+                <option value="last">Last Value</option>
+                <option value="max">Maximum</option>
+                <option value="mean">Mean</option>
+                <option value="median">Median</option>
+                <option value="min">Minimum</option>
+                <option value="prod">Product of the Elements</option>
+                <option value="q1">q1</option>
+                <option value="q2">q2</option>
+                <option value="q3">q3</option>
+                <option value="rand">Random Value</option>
+                <option value="entropy">Shannon Entropy</option>
+                <option value="stdev">Standard Deviation</option>
+                <option value="sum">Sum</option>
+                <option value="uniq">Unique</option>
+                <option value="variance">Variance</option>
+            </param>
+        </repeat>
+        <expand macro="groups_input" />
+        <param name="decimal_width" type="integer" value="2" argument="-n"
+            label="Number of Decimals"
+            help="Limit float to N decimal places"
+        />
+        <param name="ignore_non_digits" type="boolean" checked="false" argument="-i"
+            truevalue="-i"
+            falsevalue=""
+            label="Ignore non-digits"
+            help="Ignore non-digit values in columns. Ex. NA or N/A"
+        />
+        <section name="extra" title="Specific Optional Analysis Modifiers" expanded="false">
+            <param name="separator" type="text" value="; " argument="-s"
+                label="Collapse Separator String"
+                help="Input string of characters that will separate collapsed columns. The ' character is not allowed">
+                <expand macro="text_sanitizer" />
+            </param>
+            <param name="rand_int" type="integer" value="11" argument="-S"
+                label="Random Value Seed"
+                help="specify an integer"
+            />
+        </section>
+        <expand macro="global_parameters" />
+    </inputs>
+    <outputs>
+        <data format_source="in_1" name="summary" from_work_dir="summary" label="${in_1.name} summary of analyses" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="in_1" value="plot.csv" />
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="collapse" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="count" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="countn" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="countunique" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="entropy" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="first" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="last" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="max" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="mean" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="median" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="min" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="prod" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="q1" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="q2" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="q3" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="rand" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="stdev" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="sum" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="2" />
+                </conditional>
+                <param name="analysis_type" value="uniq" />
+            </repeat>
+            <repeat name="field">
+                <conditional name="column_text">
+                    <param name="select" value="string" />
+                    <param name="in_text" value="3" />
+                </conditional>
+                <param name="analysis_type" value="variance" />
+            </repeat>
+            <conditional name="group_field">
+                <param name="select_group" value="string" />
+                <param name="in_text" value="1" />
+            </conditional>
+            <output name="summary" >
+                <assert_contents>
+                    <has_text text="collapse" />
+                    <has_text text="count" />
+                    <has_text text="countn" />
+                    <has_text text="countunique" />
+                    <has_text text="entropy" />
+                    <has_text text="first" />
+                    <has_text text="last" />
+                    <has_text text="max" />
+                    <has_text text="mean" />
+                    <has_text text="median" />
+                    <has_text text="min" />
+                    <has_text text="prod" />
+                    <has_text text="q1" />
+                    <has_text text="q2" />
+                    <has_text text="q3" />
+                    <has_text text="rand" />
+                    <has_text text="stdev" />
+                    <has_text text="sum" />
+                    <has_text text="uniq" />
+                    <has_text text="variance" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Csvtk - Summary Help
+--------------------
+
+Info
+####
+
+Csvtk Summary works to allow the use of a variety of analysis tools on the selected columns(s) and display one output at the end
+
+.. class:: warningmark
+
+    Single quotes are not allowed in text inputs!
+
+@HELP_INPUT_DATA@
+
+
+Usage
+#####
+
+To run csvtk-summary, all you need is a valid (as defined above) CSV or TSV file with any column(s) that you want to
+run one of the analyses on.
+
+Analyses include:
+
+- Collapse
+
+- Count
+
+- Count Numbers (countn)
+
+- Count Unique
+
+- First Value Selection
+
+- Last Value Selection
+
+- Maximum
+
+- Mean
+
+- Median
+
+- Minimum
+
+- q1
+
+- q2
+
+- q3
+
+- Random Value Selection
+
+- Shannon Entropy
+
+- Sum
+
+- Unique Values
+
+- Variance
+
+More information on these can be found on the `csvtk website. <https://bioinf.shenwei.me/csvtk/usage/#summary>`_
+
+**Example Summary Input**
+
+Input table:
+
++-------+--------+
+| Group | Length |
++=======+========+
+| A     | 1500   |
++-------+--------+
+| B     | 1000   |
++-------+--------+
+| B     | 1500   |
++-------+--------+
+| B     | 2000   |
++-------+--------+
+
+Suppose you wanted to group the values based on column 1 of the input table and then find out the mean lenght and maximum length for each group.
+You would input this into csvtk-summary by creating 2 input repeats where the first one selects "column 2" and an analysis of "mean" and the
+second one selects "column 2" with an analysis of "maximum".
+
+Running this would generate the following output:
+
++-------+-------------+------------+
+| Group | Length:mean | Length:max |
++=======+=============+============+
+| A     | 1500        | 1500       |
++-------+-------------+------------+
+| B     | 1500        | 2000       |
++-------+-------------+------------+
+
+--------
+
+
+@HELP_COLUMNS@
+
+
+@HELP_END_STATEMENT@
+
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Animals_More.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,10 @@
+Name,Animal,Random_Number
+Spots,Dog,1
+Fred,Dog,5
+Mittens,Cat,16
+Slippers,Cat,11
+Gravy,Cat,6
+Stripes,Zebra,7
+Muffin,Cat,7
+Earl,Dog,2
+Sir-Wags-A-Lot,Dog,44
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/XY_converted.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,12 @@
+X,Y
+1,2
+2,4
+4,8
+8,16
+16,32
+32,64
+64,128
+128,256
+256,512
+1024,2048
+2048,5096
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/XY_converted.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,12 @@
+X	Y
+1	2
+2	4
+4	8
+8	16
+16	32
+32	64
+64	128
+128	256
+256	512
+1024	2048
+2048	5096
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/XY_with_break.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,13 @@
+X,Y
+1,2
+2,4
+4,8
+8,16
+16,32
+32,64
+64,128
+128,256
+256,512
+,
+1024,2048
+2048,5096
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/XY_with_break.tabular	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,13 @@
+X	Y
+1	2
+2	4
+4	8
+8	16
+16	32
+32	64
+64	128
+128	256
+256	512
+
+1024	2048
+2048	5096
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/another.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Name,Food,Age
+Joe,Beets,33
+Steven,Eggplant,36
+Jacob,Kale,59
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/awk_mutate_input.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Culture Label,Cell Count,Dilution
+ECo-1,2523,1000
+LPn-1,100,1000000
+LPn-2,4,1000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/awk_mutate_output.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Culture Label,Cell Count,Dilution,CFU/ml
+ECo-1,2523,1000,2523000
+LPn-1,100,1000000,100000000
+LPn-2,4,1000,4000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/blood_type.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,11 @@
+Name	Blood_Type	Favourite_Colour	Height
+Darian	AB	Blue	175cm
+Fred	AB-	Orange	185cm
+Jacob	AB	Blue	160cm
+Adrian	O	Blue	2000cm
+Tim	O-	Green	20cm
+Matthew	O	Green	140cm
+Patrick	O	Green	1cm
+Chester	O	Blue	22cm
+Kim	B	Teal	11cm
+Sarah	A	Blue	123cm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/collapsed.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,7 @@
+Blood_Type	Name
+AB-	Fred
+AB	Darian; Jacob
+O-	Tim
+O	Adrian; Matthew; Patrick; Chester
+B	Kim
+A	Sarah
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat_1.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,11 @@
+Name,Colour,Food
+Eric,Blue,Apples
+Darian,Blue,Pancakes
+Daniel,Red,Apples
+Emily,Blue,Apples
+Fred,-,-
+Adrian,-,-
+Steven,-,-
+Joe,-,Beets
+Steven,-,Eggplant
+Jacob,-,Kale
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concat_2.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,11 @@
+Name,Colour,Food
+Eric,Blue,Apples
+Darian,Blue,Pancakes
+Daniel,Red,Apples
+Emily,Blue,Apples
+Fred,,
+Adrian,,
+Steven,,
+Joe,,Beets
+Steven,,Eggplant
+Jacob,,Kale
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/corr_1.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,1 @@
+X,Y,0.9960
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/corr_2.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,1 @@
+X	Y	0.9997
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csv-bob.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,2 @@
+Person,Favourite Food,Favourite Colour,Height,BloodType
+Bob,Protein,All of them,250cm,O-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csv-darian.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,2 @@
+Person,Favourite Food,Favourite Colour,Height,BloodType
+Darian,Potatos,Blue,175cm,O
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csv-jack.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,2 @@
+Person,Favourite Food,Favourite Colour,Height,BloodType
+Jack,Pineapple,Off White,165cm,O
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/csv.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Person,Favourite Food,Favourite Colour,Height,BloodType
+Jack,Pineapple,Off White,165cm,O
+Bob,Protein,All of them,250cm,O-
+Darian,Potatos,Blue,175cm,O
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cut_1.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,33 @@
+Length,GC Content
+100,50.00
+100,50.05
+100,49.95
+110,50.60
+105,50.50
+101,49.05
+99,49.95
+95,50.95
+100,50.00
+100,50.00
+90,66.00
+100,66.60
+100,65.05
+101,65.95
+101,65.55
+99,66.00
+95,66.05
+100,66.55
+105,65.55
+100,65.55
+110,66.55
+110,70.00
+100,70.00
+90,45.65
+99,45.60
+99,45.50
+95,45.20
+95,45.55
+100,45.55
+100,45.20
+100,45.55
+100,45.50
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cut_2.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,33 @@
+Group,GC Content
+A,50.00
+A,50.05
+A,49.95
+A,50.60
+A,50.50
+A,49.05
+A,49.95
+A,50.95
+A,50.00
+A,50.00
+B,66.00
+B,66.60
+B,65.05
+B,65.95
+B,65.55
+B,66.00
+B,66.05
+B,66.55
+B,65.55
+B,65.55
+B,66.55
+C,70.00
+C,70.00
+D,45.65
+D,45.60
+D,45.50
+D,45.20
+D,45.55
+D,45.55
+D,45.20
+D,45.55
+D,45.50
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/data.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Person,Height,Sport,Job
+Fred,140cm,Diving,Accountant
+Darian,175cm,Running,Student
+Jake,188cm,Shotput,Moving Heavy Objects
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Blood_Type	Favourite_Colour	frequency
+AB	Blue	2
+O	Green	2
+O	Blue	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/frequency.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,8 @@
+Blood_Type	Favourite_Colour	frequency
+AB-	Orange	1
+AB	Blue	2
+O-	Green	1
+O	Green	2
+O	Blue	2
+B	Teal	1
+A	Blue	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gathered.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Favourite Food,Favourite Colour,Height,BloodType,1,2
+Pineapple,Off White,165cm,O,Person,Jack
+Protein,All of them,250cm,O-,Person,Bob
+Potatos,Blue,175cm,O,Person,Darian
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illegal.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,6 @@
+Test,A,B,C
+D,,S,C
+F,F,F,F
+F,F,F,
+TT,TT,TT,TT
+Agh,Ol,As,TT
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/illegal_collapse.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,5 @@
+C,Test
+C,D
+F,F
+,F
+TT,TT; Agh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/joined.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,2 @@
+Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job
+Darian,Potatos,Blue,175cm,O,175cm,Running,Student
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/joined_filled.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,6 @@
+Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job
+Jack,Pineapple,Off White,165cm,O,a,a,a
+Bob,Protein,All of them,250cm,O-,a,a,a
+Darian,Potatos,Blue,175cm,O,175cm,Running,Student
+Fred,a,a,a,a,140cm,Diving,Accountant
+Jake,a,a,a,a,188cm,Shotput,Moving Heavy Objects
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/kv.txt	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,3 @@
+Key	Value
+Dog	Big
+Cat	Small
\ No newline at end of file
Binary file test-data/line_all_param.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mutate_removed.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,11 @@
+Blood_Type	Favourite_Colour	Height	new_column
+AB	Blue	175cm	Darian
+AB-	Orange	185cm
+AB	Blue	160cm
+O	Blue	2000cm
+O-	Green	20cm
+O	Green	140cm
+O	Green	1cm
+O	Blue	22cm
+B	Teal	11cm
+A	Blue	123cm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mutated.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,11 @@
+Name	Blood_Type	Favourite_Colour	Height	new_column
+Darian	AB	Blue	175cm	Darian
+Fred	AB-	Orange	185cm	Fred
+Jacob	AB	Blue	160cm	Jacob
+Adrian	O	Blue	2000cm	Adrian
+Tim	O-	Green	20cm	Tim
+Matthew	O	Green	140cm	Matthew
+Patrick	O	Green	1cm	Patrick
+Chester	O	Blue	22cm	Chester
+Kim	B	Teal	11cm	Kim
+Sarah	A	Blue	123cm	Sarah
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/other.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Name,Sport,Job
+Fred,Volleyball,Molecular Biologist
+Adrian,Basketball,Computational Biologist
+Steven,Football,Microbiologist
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/plot.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,33 @@
+Group,Length,GC Content
+A,100,50.00
+A,100,50.05
+A,100,49.95
+A,110,50.60
+A,105,50.50
+A,101,49.05
+A,99,49.95
+A,95,50.95
+A,100,50.00
+A,100,50.00
+B,90,66.00
+B,100,66.60
+B,100,65.05
+B,101,65.95
+B,101,65.55
+B,99,66.00
+B,95,66.05
+B,100,66.55
+B,105,65.55
+B,100,65.55
+B,110,66.55
+C,110,70.00
+C,100,70.00
+D,90,45.65
+D,99,45.60
+D,99,45.50
+D,95,45.20
+D,95,45.55
+D,100,45.55
+D,100,45.20
+D,100,45.55
+D,100,45.50
\ No newline at end of file
Binary file test-data/plot.png has changed
Binary file test-data/plot_box.png has changed
Binary file test-data/plot_scatter.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_1.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Name,Animal
+Shirley,1-Dog
+Mittens,2-Cat
+Fuzzy,3-Chinchilla
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_2.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Name,Animal
+Shirley,Big
+Mittens,Small
+Fuzzy,Chinchilla
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_input.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,4 @@
+Name,Animal
+Shirley,Dog
+Mittens,Cat
+Fuzzy,Chinchilla
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampled_1.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,16 @@
+Group,Length,GC Content
+A,100,50.00
+A,101,49.05
+A,100,50.00
+A,100,50.00
+B,90,66.00
+B,100,66.60
+B,101,65.55
+B,99,66.00
+B,105,65.55
+D,90,45.65
+D,99,45.60
+D,99,45.50
+D,95,45.20
+D,95,45.55
+D,100,45.20
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sampled_2.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,25 @@
+n,Group,Length,GC Content
+1,A,100,50.00
+4,A,110,50.60
+5,A,105,50.50
+6,A,101,49.05
+9,A,100,50.00
+10,A,100,50.00
+11,B,90,66.00
+12,B,100,66.60
+13,B,100,65.05
+14,B,101,65.95
+15,B,101,65.55
+16,B,99,66.00
+19,B,105,65.55
+20,B,100,65.55
+22,C,110,70.00
+24,D,90,45.65
+25,D,99,45.60
+26,D,99,45.50
+27,D,95,45.20
+28,D,95,45.55
+29,D,100,45.55
+30,D,100,45.20
+31,D,100,45.55
+32,D,100,45.50
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/separated_1.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,7 @@
+Blood_Type	1	2
+AB-	Fred	NA
+AB	Darian	Jacob
+O-	Tim	NA
+O	Adrian	Matthew
+B	Kim	NA
+A	Sarah	NA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/separated_2.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,7 @@
+Blood_Type	Name	1	2
+AB-	Fred	Fred	N/A
+AB	Darian; Jacob	Darian	Jacob
+O-	Tim	Tim	N/A
+O	Adrian; Matthew; Patrick; Chester	Adrian	Matthew; Patrick; Chester
+B	Kim	Kim	N/A
+A	Sarah	Sarah	N/A
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort_order.txt	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,3 @@
+Zebra
+Cat
+Dog
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorted_1.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,10 @@
+Name,Animal,Random_Number
+Stripes,Zebra,7
+Mittens,Cat,16
+Slippers,Cat,11
+Muffin,Cat,7
+Gravy,Cat,6
+Sir-Wags-A-Lot,Dog,44
+Fred,Dog,5
+Earl,Dog,2
+Spots,Dog,1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.csv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,5 @@
+Name,Colour,Food
+Eric,Blue,Apples
+Darian,Blue,Pancakes
+Daniel,Red,Apples
+Emily,Blue,Apples
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/uniq.tsv	Tue May 19 17:23:57 2020 -0400
@@ -0,0 +1,8 @@
+Name	Blood_Type	Favourite_Colour	Height
+Darian	AB	Blue	175cm
+Fred	AB-	Orange	185cm
+Adrian	O	Blue	2000cm
+Tim	O-	Green	20cm
+Matthew	O	Green	140cm
+Kim	B	Teal	11cm
+Sarah	A	Blue	123cm