Mercurial > repos > nml > csvtk_split
changeset 0:92509cf500a4 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,337 @@ +<macros> + <token name="@VERSION@">0.20.0</token> + <token name="@GALAXY_VERSION@">galaxy0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">csvtk</requirement> + </requirements> + </xml> + <xml name="version_cmd"> + <version_command>csvtk version</version_command> + </xml> + <xml name="text_sanitizer"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </xml> + <xml name="multi_input"> + <param type="data" name="in_1" format="csv,tabular" + multiple="true" optional="false" + label="Specify TSV or CSV file inputs" + help="Input tsv or csv files to analyze. HOWEVER, they must all be the **same file type** or the tool will fail/not give correct results!" + /> + </xml> + <xml name="singular_input"> + <param type="data" name="in_1" format="csv,tabular" + multiple="false" optional="false" + label="Specify an input TSV or CSV file" + help="Input a TSV or CSV file to work on" + /> + </xml> + <xml name="ignore_case"> + <param type="boolean" name="ignore_case" + falsevalue="" truevalue="-i" + checked="false" + argument="-i" + label="Ignore cell case?" + help="ABC == abc" + /> + </xml> + <xml name="global_parameters"> + <section name="global_param" title="csvtk Global Parameters" expanded="false"> + <param type="boolean" name="header" + falsevalue="-H" truevalue="" + checked="true" + argument="-H" + label="Input file has a header line" + /> + <param type="boolean" name="illegal_rows" + falsevalue="" truevalue="-I" + checked="false" + argument="-I" + label="Ignore illegal rows" + help="Use if file has illegal rows as defined in the help section" + /> + <param type="boolean" name="empty_rows" + falsevalue="" truevalue="-E" + checked="false" + argument="-E" + label="Ignore empty rows" + help="Ignore rows with no data (only needed if input has empty rows)" + /> + <param type="boolean" name="lazy_quotes" + falsevalue="" truevalue="-l" + checked="false" + argument="-l" + label="File has Lazy quotes" + help="(TSV files only) If Yes, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field" + /> + </section> + </xml> + <xml name="fields_input"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input column(s) based on" argument="-F -f"> + <option value="string">Column Name(s)</option> + <option value="column">Column Number(s)</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="true" + label="Input column number(s)" + help="Select column(s) to use for analysis" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input column name(s)" + help="Multiple columns can be given if separated by a ' , '. + Column numbers can be given too - ex. '1,2' will target columns 1 and 2. + Please see the help section below for more detailed info"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="singular_fields_input"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Input column number" + help="Select column to use for analysis" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input column name" + help="Input column name or number ex. 'Length' or '1'. + Please see the help section below for more detailed info"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="plot_field"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input data column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Input data column number" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input data column name" + help="Can use column name or column number"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="groups_input"> + <conditional name="group_field" > + <param type="select" name="select_group" label="Select column to group data" argument="-g" optional="false" help="Specify a single column that is used to create data groups. An example is shown in the help section"> + <option value="none">None</option> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="none" /> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Group by column number" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Group by column name" + help="Can use column name or number"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="global_plot_parameters"> + <section name="plot_parameters" title="Advanced Optional Plot Parameters" expanded="false"> + <param type="float" name="figure_height" + argument="--height" + optional="true" + label="Figure Height (Default 4.5)" + /> + <param type="float" name="figure_width" + argument="--width" + optional="true" + label="Figure Width (Default 1.5)" + /> + <param type="float" name="tick_width" + argument="--tick-width" + optional="true" + label="Axis Tick Width (Default 1.5)" + /> + <param type="text" name="title" + argument="--title" + optional="true" + label="Specify Figure Title"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="text" name="x_label" + argument="--xlab" + optional="true" + label="Specify X-axis label"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="float" name="min_x" + argument="--x-min" + optional="true" + label="Minimum value of X-axis (float)" + /> + <param type="float" name="max_x" + argument="--x-max" + optional="true" + label="Maximum value of X-axis (float)" + /> + <param type="text" name="y_label" + argument="--ylab" + optional="true" + label="Specify Y-axis label"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="float" name="min_y" + argument="--y-min" + optional="true" + label="Minimum value of Y-axis (float)" + /> + <param type="float" name="max_y" + argument="--y-max" + optional="true" + label="Maximum value of Y-axis (float)" + /> + </section> + </xml> + <token name="@HELP_COLUMNS@"> +Column Name Input Help +###################### + +- Multiple names can be given if separated by a ' , '. + + - ex. 'ID,Organism' would target the columns named ID and Organism for the function + +- Column names are case SeNsitive + +- Column numbers can also be given: + + -ex. '1,2,3' or '1-3' for inputting columns 1-3. + +- You can also specify all but unwanted column(s) with a ' - '. + + - ex. '-ID' would target all columns but the ID column + +---- + </token> + <token name="@HELP_INPUT_DATA@"> +Input Data +########## + +:: + + **Limitations of Input Data** + + 1. The CSV parser requires all the lines have same number of fields/columns. + If your file has illegal rows, set the "Illegal Rows" parameter to "Yes" to pass your data through + Even lines with spaces will cause error. + Example bad table below. + + 2. By default, csvtk thinks files have header rows. If your file does not, set global parameter + "Has Header Row" to "No" + + 3. Column names should be unique and are case sensitive! + + 4. Lines starting with "#" or "$" will be ignored, if in the header row + + 5. If " exists in tab-delimited files, set Lazy quotes global parameter to "Yes" + +Example bad table: + ++--------+--------+--------+--------+ +| Head 1 | Head 2 | Head 3 | Head 3 | ++========+========+========+========+ +| 1 | 2 | 3 | | ++--------+--------+--------+--------+ +| this | will | | break | ++--------+--------+--------+--------+ + +Bad tables may work if both the "Ignore Illegal Rows" and "Ignore Empty Rows" global parameters are set to "Yes", +But there is no guarentee of that! + +---- + </token> + <token name="@HELP_END_STATEMENT@"> +More Information +################ +For information from the creators of csvtk, please visit their site at: https://bioinf.shenwei.me/csvtk/ + +Although be aware that some features may not be available and some small changes were made to work with Galaxy. + +**Notable changes from their documentation:** + +- Cannot specify multiple file header names (IE cannot use "name;username" as a valid column match) + +- No single quotes / apostrophes allowed in text inputs + </token> + <xml name="citations"> + <citations> + <citation type="bibtex">@ARTICLE{a1, + title = {csvtk - CSV/TSV Toolkit}, + author = {Wei Shen}, + url = {https://github.com/shenwei356/csvtk} + } + }</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split.xml Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,173 @@ +<tool id="csvtk_split" name="csvtk-split" version="@VERSION@+@GALAXY_VERSION@"> + <description>table into multiple files</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_cmd" /> + <command detect_errors="exit_code"><![CDATA[ + +## Create directory and set input name ## +######################################### +mkdir -p ./output && + +#set input_name = str($in_1.name) +ln -s '$in_1' $input_name && + +################### +## Start Command ## +################### + +csvtk split -o ./output --num-cpus "\${GALAXY_SLOTS:-1}" + + ## Add additional flags as specified ## + ####################################### + $ignore_case + $global_param.illegal_rows + $global_param.empty_rows + $global_param.header + $global_param.lazy_quotes + + ## Set Tabular input/output flag if input is tabular ## + ####################################################### + #if $in_1.is_of_type("tabular"): + -t -T + #end if + + ## Set Input ## + ############### + '$input_name' + + ## Specify fields for splitting ## + ################################## + -F -f '$column_text.in_text' + + ]]></command> + <inputs> + <expand macro="singular_input" /> + <expand macro="fields_input" /> + <expand macro="ignore_case" /> + <expand macro="global_parameters" /> + </inputs> + <outputs> + <collection name="list_split" type="list" label="${in_1.name} split on column ${column_text.in_text}"> + <discover_datasets pattern="__designation_and_ext__" directory="output" visible="false" format="csv,tabular"/> + </collection> + </outputs> + <tests> + <test> + <param name="in_1" value="csv.csv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="1" /> + </conditional> + <param name="ignore_case" value="true" /> + <output_collection name="list_split" type="list"> + <element name="csv-bob" file="csv-bob.csv" ftype="csv" /> + <element name="csv-darian" file="csv-darian.csv" ftype="csv" /> + <element name="csv-jack" file="csv-jack.csv" ftype="csv" /> + </output_collection> + </test> + </tests> + <help><![CDATA[ + +Csvtk - Split Help +------------------ + +Info +#### +Split input csv/tsv files into multiple csv/tsv files based on a column number(s) or name(s). + +This tool will generate as many output files as there are unique data points in the selected column(s). + +Examples below. + +.. class:: warningmark + + Single quotes are not allowed in text inputs! + +@HELP_INPUT_DATA@ + + +Usage +##### +Here we will demonstrate what the tool does when 1 column is selected or when 2+ columns are selected. + +**Ex. Split input based on column 1:** + +:: + + Suppose we have the following input table that we want to split on column 2, the 'Colour' column: + + +--------+--------+----------+ + | Name | Colour | Food | + +========+========+==========+ + | Eric | Blue | Apples | + +--------+--------+----------+ + | Darian | Blue | Pancakes | + +--------+--------+----------+ + | Daniel | Red | Apples | + +--------+--------+----------+ + | Frank | Blue | Apples | + +--------+--------+----------+ + + -------------------------------------------------------------------------------- + + Splitting would yield the following 2 tables: + + +--------+--------+----------+ + | Name | Colour | Food | + +========+========+==========+ +--------+--------+----------+ + | Eric | Blue | Apples | | Name | Colour | Food | + +--------+--------+----------+ + +========+========+==========+ + | Darian | Blue | Pancakes | | Daniel | Red | Apples | + +--------+--------+----------+ +--------+--------+----------+ + | Frank | Blue | Apples | + +--------+--------+----------+ + + --------------------------------------------------------------------------------- + +---- + +**Ex. Split on 2 columns:** + +:: + + Suppose keep the same input but instead split on both the 'Colour' and 'Food' columns: + + +--------+--------+----------+ + | Name | Colour | Food | + +========+========+==========+ + | Eric | Blue | Apples | + +--------+--------+----------+ + | Darian | Blue | Pancakes | + +--------+--------+----------+ + | Daniel | Red | Apples | + +--------+--------+----------+ + | Frank | Blue | Apples | + +--------+--------+----------+ + + -------------------------------------------------------------------------------------------------------- + + This split would yield the following three output files: + + +--------+--------+----------+ + | Name | Colour | Food | +--------+--------+----------+ +--------+--------+----------+ + +========+========+==========+ | Name | Colour | Food | | Name | Colour | Food | + | Eric | Blue | Apples | + +========+========+==========+ + +========+========+==========+ + +--------+--------+----------+ | Darian | Blue | Pancakes | | Daniel | Red | Apples | + | Frank | Blue | Apples | +--------+--------+----------+ +--------+--------+----------+ + +--------+--------+----------+ + + Here, the value in the cell has to match for both columns for them to be output together. Frank and Eric + like apples and the colour blue so they get put together. And even though Darian likes the colour blue, he + prefers pancakes and thus gets his own output file. + +---- + +@HELP_END_STATEMENT@ + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Animals_More.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,10 @@ +Name,Animal,Random_Number +Spots,Dog,1 +Fred,Dog,5 +Mittens,Cat,16 +Slippers,Cat,11 +Gravy,Cat,6 +Stripes,Zebra,7 +Muffin,Cat,7 +Earl,Dog,2 +Sir-Wags-A-Lot,Dog,44
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_converted.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,12 @@ +X,Y +1,2 +2,4 +4,8 +8,16 +16,32 +32,64 +64,128 +128,256 +256,512 +1024,2048 +2048,5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_converted.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,12 @@ +X Y +1 2 +2 4 +4 8 +8 16 +16 32 +32 64 +64 128 +128 256 +256 512 +1024 2048 +2048 5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_with_break.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,13 @@ +X,Y +1,2 +2,4 +4,8 +8,16 +16,32 +32,64 +64,128 +128,256 +256,512 +, +1024,2048 +2048,5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_with_break.tabular Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,13 @@ +X Y +1 2 +2 4 +4 8 +8 16 +16 32 +32 64 +64 128 +128 256 +256 512 + +1024 2048 +2048 5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/another.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Name,Food,Age +Joe,Beets,33 +Steven,Eggplant,36 +Jacob,Kale,59 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_mutate_input.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Culture Label,Cell Count,Dilution +ECo-1,2523,1000 +LPn-1,100,1000000 +LPn-2,4,1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_mutate_output.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Culture Label,Cell Count,Dilution,CFU/ml +ECo-1,2523,1000,2523000 +LPn-1,100,1000000,100000000 +LPn-2,4,1000,4000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blood_type.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,11 @@ +Name Blood_Type Favourite_Colour Height +Darian AB Blue 175cm +Fred AB- Orange 185cm +Jacob AB Blue 160cm +Adrian O Blue 2000cm +Tim O- Green 20cm +Matthew O Green 140cm +Patrick O Green 1cm +Chester O Blue 22cm +Kim B Teal 11cm +Sarah A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapsed.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type Name +AB- Fred +AB Darian; Jacob +O- Tim +O Adrian; Matthew; Patrick; Chester +B Kim +A Sarah
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concat_1.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,11 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples +Fred,-,- +Adrian,-,- +Steven,-,- +Joe,-,Beets +Steven,-,Eggplant +Jacob,-,Kale
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concat_2.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,11 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples +Fred,, +Adrian,, +Steven,, +Joe,,Beets +Steven,,Eggplant +Jacob,,Kale
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corr_1.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,1 @@ +X,Y,0.9960
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corr_2.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,1 @@ +X Y 0.9997
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-bob.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Bob,Protein,All of them,250cm,O-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-darian.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Darian,Potatos,Blue,175cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-jack.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Jack,Pineapple,Off White,165cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Jack,Pineapple,Off White,165cm,O +Bob,Protein,All of them,250cm,O- +Darian,Potatos,Blue,175cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_1.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,33 @@ +Length,GC Content +100,50.00 +100,50.05 +100,49.95 +110,50.60 +105,50.50 +101,49.05 +99,49.95 +95,50.95 +100,50.00 +100,50.00 +90,66.00 +100,66.60 +100,65.05 +101,65.95 +101,65.55 +99,66.00 +95,66.05 +100,66.55 +105,65.55 +100,65.55 +110,66.55 +110,70.00 +100,70.00 +90,45.65 +99,45.60 +99,45.50 +95,45.20 +95,45.55 +100,45.55 +100,45.20 +100,45.55 +100,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_2.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,33 @@ +Group,GC Content +A,50.00 +A,50.05 +A,49.95 +A,50.60 +A,50.50 +A,49.05 +A,49.95 +A,50.95 +A,50.00 +A,50.00 +B,66.00 +B,66.60 +B,65.05 +B,65.95 +B,65.55 +B,66.00 +B,66.05 +B,66.55 +B,65.55 +B,65.55 +B,66.55 +C,70.00 +C,70.00 +D,45.65 +D,45.60 +D,45.50 +D,45.20 +D,45.55 +D,45.55 +D,45.20 +D,45.55 +D,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Person,Height,Sport,Job +Fred,140cm,Diving,Accountant +Darian,175cm,Running,Student +Jake,188cm,Shotput,Moving Heavy Objects
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Blood_Type Favourite_Colour frequency +AB Blue 2 +O Green 2 +O Blue 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/frequency.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,8 @@ +Blood_Type Favourite_Colour frequency +AB- Orange 1 +AB Blue 2 +O- Green 1 +O Green 2 +O Blue 2 +B Teal 1 +A Blue 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gathered.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Favourite Food,Favourite Colour,Height,BloodType,1,2 +Pineapple,Off White,165cm,O,Person,Jack +Protein,All of them,250cm,O-,Person,Bob +Potatos,Blue,175cm,O,Person,Darian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illegal.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,6 @@ +Test,A,B,C +D,,S,C +F,F,F,F +F,F,F, +TT,TT,TT,TT +Agh,Ol,As,TT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illegal_collapse.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,5 @@ +C,Test +C,D +F,F +,F +TT,TT; Agh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/joined.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job +Darian,Potatos,Blue,175cm,O,175cm,Running,Student
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/joined_filled.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,6 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job +Jack,Pineapple,Off White,165cm,O,a,a,a +Bob,Protein,All of them,250cm,O-,a,a,a +Darian,Potatos,Blue,175cm,O,175cm,Running,Student +Fred,a,a,a,a,140cm,Diving,Accountant +Jake,a,a,a,a,188cm,Shotput,Moving Heavy Objects
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kv.txt Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,3 @@ +Key Value +Dog Big +Cat Small \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mutate_removed.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,11 @@ +Blood_Type Favourite_Colour Height new_column +AB Blue 175cm Darian +AB- Orange 185cm +AB Blue 160cm +O Blue 2000cm +O- Green 20cm +O Green 140cm +O Green 1cm +O Blue 22cm +B Teal 11cm +A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mutated.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,11 @@ +Name Blood_Type Favourite_Colour Height new_column +Darian AB Blue 175cm Darian +Fred AB- Orange 185cm Fred +Jacob AB Blue 160cm Jacob +Adrian O Blue 2000cm Adrian +Tim O- Green 20cm Tim +Matthew O Green 140cm Matthew +Patrick O Green 1cm Patrick +Chester O Blue 22cm Chester +Kim B Teal 11cm Kim +Sarah A Blue 123cm Sarah
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/other.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Name,Sport,Job +Fred,Volleyball,Molecular Biologist +Adrian,Basketball,Computational Biologist +Steven,Football,Microbiologist \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/plot.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,33 @@ +Group,Length,GC Content +A,100,50.00 +A,100,50.05 +A,100,49.95 +A,110,50.60 +A,105,50.50 +A,101,49.05 +A,99,49.95 +A,95,50.95 +A,100,50.00 +A,100,50.00 +B,90,66.00 +B,100,66.60 +B,100,65.05 +B,101,65.95 +B,101,65.55 +B,99,66.00 +B,95,66.05 +B,100,66.55 +B,105,65.55 +B,100,65.55 +B,110,66.55 +C,110,70.00 +C,100,70.00 +D,90,45.65 +D,99,45.60 +D,99,45.50 +D,95,45.20 +D,95,45.55 +D,100,45.55 +D,100,45.20 +D,100,45.55 +D,100,45.50 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_1.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,1-Dog +Mittens,2-Cat +Fuzzy,3-Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_2.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,Big +Mittens,Small +Fuzzy,Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_input.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,Dog +Mittens,Cat +Fuzzy,Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampled_1.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,16 @@ +Group,Length,GC Content +A,100,50.00 +A,101,49.05 +A,100,50.00 +A,100,50.00 +B,90,66.00 +B,100,66.60 +B,101,65.55 +B,99,66.00 +B,105,65.55 +D,90,45.65 +D,99,45.60 +D,99,45.50 +D,95,45.20 +D,95,45.55 +D,100,45.20
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampled_2.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,25 @@ +n,Group,Length,GC Content +1,A,100,50.00 +4,A,110,50.60 +5,A,105,50.50 +6,A,101,49.05 +9,A,100,50.00 +10,A,100,50.00 +11,B,90,66.00 +12,B,100,66.60 +13,B,100,65.05 +14,B,101,65.95 +15,B,101,65.55 +16,B,99,66.00 +19,B,105,65.55 +20,B,100,65.55 +22,C,110,70.00 +24,D,90,45.65 +25,D,99,45.60 +26,D,99,45.50 +27,D,95,45.20 +28,D,95,45.55 +29,D,100,45.55 +30,D,100,45.20 +31,D,100,45.55 +32,D,100,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/separated_1.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type 1 2 +AB- Fred NA +AB Darian Jacob +O- Tim NA +O Adrian Matthew +B Kim NA +A Sarah NA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/separated_2.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type Name 1 2 +AB- Fred Fred N/A +AB Darian; Jacob Darian Jacob +O- Tim Tim N/A +O Adrian; Matthew; Patrick; Chester Adrian Matthew; Patrick; Chester +B Kim Kim N/A +A Sarah Sarah N/A
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_order.txt Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,3 @@ +Zebra +Cat +Dog
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_1.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,10 @@ +Name,Animal,Random_Number +Stripes,Zebra,7 +Mittens,Cat,16 +Slippers,Cat,11 +Muffin,Cat,7 +Gravy,Cat,6 +Sir-Wags-A-Lot,Dog,44 +Fred,Dog,5 +Earl,Dog,2 +Spots,Dog,1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,5 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/uniq.tsv Tue May 19 17:18:02 2020 -0400 @@ -0,0 +1,8 @@ +Name Blood_Type Favourite_Colour Height +Darian AB Blue 175cm +Fred AB- Orange 185cm +Adrian O Blue 2000cm +Tim O- Green 20cm +Matthew O Green 140cm +Kim B Teal 11cm +Sarah A Blue 123cm