Mercurial > repos > nml > csvtk_uniq
changeset 0:07d1c048ec1b draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,337 @@ +<macros> + <token name="@VERSION@">0.20.0</token> + <token name="@GALAXY_VERSION@">galaxy0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">csvtk</requirement> + </requirements> + </xml> + <xml name="version_cmd"> + <version_command>csvtk version</version_command> + </xml> + <xml name="text_sanitizer"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </xml> + <xml name="multi_input"> + <param type="data" name="in_1" format="csv,tabular" + multiple="true" optional="false" + label="Specify TSV or CSV file inputs" + help="Input tsv or csv files to analyze. HOWEVER, they must all be the **same file type** or the tool will fail/not give correct results!" + /> + </xml> + <xml name="singular_input"> + <param type="data" name="in_1" format="csv,tabular" + multiple="false" optional="false" + label="Specify an input TSV or CSV file" + help="Input a TSV or CSV file to work on" + /> + </xml> + <xml name="ignore_case"> + <param type="boolean" name="ignore_case" + falsevalue="" truevalue="-i" + checked="false" + argument="-i" + label="Ignore cell case?" + help="ABC == abc" + /> + </xml> + <xml name="global_parameters"> + <section name="global_param" title="csvtk Global Parameters" expanded="false"> + <param type="boolean" name="header" + falsevalue="-H" truevalue="" + checked="true" + argument="-H" + label="Input file has a header line" + /> + <param type="boolean" name="illegal_rows" + falsevalue="" truevalue="-I" + checked="false" + argument="-I" + label="Ignore illegal rows" + help="Use if file has illegal rows as defined in the help section" + /> + <param type="boolean" name="empty_rows" + falsevalue="" truevalue="-E" + checked="false" + argument="-E" + label="Ignore empty rows" + help="Ignore rows with no data (only needed if input has empty rows)" + /> + <param type="boolean" name="lazy_quotes" + falsevalue="" truevalue="-l" + checked="false" + argument="-l" + label="File has Lazy quotes" + help="(TSV files only) If Yes, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field" + /> + </section> + </xml> + <xml name="fields_input"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input column(s) based on" argument="-F -f"> + <option value="string">Column Name(s)</option> + <option value="column">Column Number(s)</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="true" + label="Input column number(s)" + help="Select column(s) to use for analysis" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input column name(s)" + help="Multiple columns can be given if separated by a ' , '. + Column numbers can be given too - ex. '1,2' will target columns 1 and 2. + Please see the help section below for more detailed info"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="singular_fields_input"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Input column number" + help="Select column to use for analysis" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input column name" + help="Input column name or number ex. 'Length' or '1'. + Please see the help section below for more detailed info"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="plot_field"> + <conditional name="column_text" > + <param type="select" name="select" label="Select input data column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Input data column number" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Input data column name" + help="Can use column name or column number"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="groups_input"> + <conditional name="group_field" > + <param type="select" name="select_group" label="Select column to group data" argument="-g" optional="false" help="Specify a single column that is used to create data groups. An example is shown in the help section"> + <option value="none">None</option> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="none" /> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="false" + label="Group by column number" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="false" + label="Group by column name" + help="Can use column name or number"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </xml> + <xml name="global_plot_parameters"> + <section name="plot_parameters" title="Advanced Optional Plot Parameters" expanded="false"> + <param type="float" name="figure_height" + argument="--height" + optional="true" + label="Figure Height (Default 4.5)" + /> + <param type="float" name="figure_width" + argument="--width" + optional="true" + label="Figure Width (Default 1.5)" + /> + <param type="float" name="tick_width" + argument="--tick-width" + optional="true" + label="Axis Tick Width (Default 1.5)" + /> + <param type="text" name="title" + argument="--title" + optional="true" + label="Specify Figure Title"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="text" name="x_label" + argument="--xlab" + optional="true" + label="Specify X-axis label"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="float" name="min_x" + argument="--x-min" + optional="true" + label="Minimum value of X-axis (float)" + /> + <param type="float" name="max_x" + argument="--x-max" + optional="true" + label="Maximum value of X-axis (float)" + /> + <param type="text" name="y_label" + argument="--ylab" + optional="true" + label="Specify Y-axis label"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param type="float" name="min_y" + argument="--y-min" + optional="true" + label="Minimum value of Y-axis (float)" + /> + <param type="float" name="max_y" + argument="--y-max" + optional="true" + label="Maximum value of Y-axis (float)" + /> + </section> + </xml> + <token name="@HELP_COLUMNS@"> +Column Name Input Help +###################### + +- Multiple names can be given if separated by a ' , '. + + - ex. 'ID,Organism' would target the columns named ID and Organism for the function + +- Column names are case SeNsitive + +- Column numbers can also be given: + + -ex. '1,2,3' or '1-3' for inputting columns 1-3. + +- You can also specify all but unwanted column(s) with a ' - '. + + - ex. '-ID' would target all columns but the ID column + +---- + </token> + <token name="@HELP_INPUT_DATA@"> +Input Data +########## + +:: + + **Limitations of Input Data** + + 1. The CSV parser requires all the lines have same number of fields/columns. + If your file has illegal rows, set the "Illegal Rows" parameter to "Yes" to pass your data through + Even lines with spaces will cause error. + Example bad table below. + + 2. By default, csvtk thinks files have header rows. If your file does not, set global parameter + "Has Header Row" to "No" + + 3. Column names should be unique and are case sensitive! + + 4. Lines starting with "#" or "$" will be ignored, if in the header row + + 5. If " exists in tab-delimited files, set Lazy quotes global parameter to "Yes" + +Example bad table: + ++--------+--------+--------+--------+ +| Head 1 | Head 2 | Head 3 | Head 3 | ++========+========+========+========+ +| 1 | 2 | 3 | | ++--------+--------+--------+--------+ +| this | will | | break | ++--------+--------+--------+--------+ + +Bad tables may work if both the "Ignore Illegal Rows" and "Ignore Empty Rows" global parameters are set to "Yes", +But there is no guarentee of that! + +---- + </token> + <token name="@HELP_END_STATEMENT@"> +More Information +################ +For information from the creators of csvtk, please visit their site at: https://bioinf.shenwei.me/csvtk/ + +Although be aware that some features may not be available and some small changes were made to work with Galaxy. + +**Notable changes from their documentation:** + +- Cannot specify multiple file header names (IE cannot use "name;username" as a valid column match) + +- No single quotes / apostrophes allowed in text inputs + </token> + <xml name="citations"> + <citations> + <citation type="bibtex">@ARTICLE{a1, + title = {csvtk - CSV/TSV Toolkit}, + author = {Wei Shen}, + url = {https://github.com/shenwei356/csvtk} + } + }</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Animals_More.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,10 @@ +Name,Animal,Random_Number +Spots,Dog,1 +Fred,Dog,5 +Mittens,Cat,16 +Slippers,Cat,11 +Gravy,Cat,6 +Stripes,Zebra,7 +Muffin,Cat,7 +Earl,Dog,2 +Sir-Wags-A-Lot,Dog,44
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_converted.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,12 @@ +X,Y +1,2 +2,4 +4,8 +8,16 +16,32 +32,64 +64,128 +128,256 +256,512 +1024,2048 +2048,5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_converted.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,12 @@ +X Y +1 2 +2 4 +4 8 +8 16 +16 32 +32 64 +64 128 +128 256 +256 512 +1024 2048 +2048 5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_with_break.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,13 @@ +X,Y +1,2 +2,4 +4,8 +8,16 +16,32 +32,64 +64,128 +128,256 +256,512 +, +1024,2048 +2048,5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/XY_with_break.tabular Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,13 @@ +X Y +1 2 +2 4 +4 8 +8 16 +16 32 +32 64 +64 128 +128 256 +256 512 + +1024 2048 +2048 5096
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/another.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Name,Food,Age +Joe,Beets,33 +Steven,Eggplant,36 +Jacob,Kale,59 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_mutate_input.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Culture Label,Cell Count,Dilution +ECo-1,2523,1000 +LPn-1,100,1000000 +LPn-2,4,1000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_mutate_output.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Culture Label,Cell Count,Dilution,CFU/ml +ECo-1,2523,1000,2523000 +LPn-1,100,1000000,100000000 +LPn-2,4,1000,4000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/blood_type.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,11 @@ +Name Blood_Type Favourite_Colour Height +Darian AB Blue 175cm +Fred AB- Orange 185cm +Jacob AB Blue 160cm +Adrian O Blue 2000cm +Tim O- Green 20cm +Matthew O Green 140cm +Patrick O Green 1cm +Chester O Blue 22cm +Kim B Teal 11cm +Sarah A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/collapsed.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type Name +AB- Fred +AB Darian; Jacob +O- Tim +O Adrian; Matthew; Patrick; Chester +B Kim +A Sarah
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concat_1.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,11 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples +Fred,-,- +Adrian,-,- +Steven,-,- +Joe,-,Beets +Steven,-,Eggplant +Jacob,-,Kale
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/concat_2.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,11 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples +Fred,, +Adrian,, +Steven,, +Joe,,Beets +Steven,,Eggplant +Jacob,,Kale
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corr_1.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,1 @@ +X,Y,0.9960
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corr_2.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,1 @@ +X Y 0.9997
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-bob.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Bob,Protein,All of them,250cm,O-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-darian.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Darian,Potatos,Blue,175cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv-jack.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Jack,Pineapple,Off White,165cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/csv.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType +Jack,Pineapple,Off White,165cm,O +Bob,Protein,All of them,250cm,O- +Darian,Potatos,Blue,175cm,O
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_1.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,33 @@ +Length,GC Content +100,50.00 +100,50.05 +100,49.95 +110,50.60 +105,50.50 +101,49.05 +99,49.95 +95,50.95 +100,50.00 +100,50.00 +90,66.00 +100,66.60 +100,65.05 +101,65.95 +101,65.55 +99,66.00 +95,66.05 +100,66.55 +105,65.55 +100,65.55 +110,66.55 +110,70.00 +100,70.00 +90,45.65 +99,45.60 +99,45.50 +95,45.20 +95,45.55 +100,45.55 +100,45.20 +100,45.55 +100,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_2.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,33 @@ +Group,GC Content +A,50.00 +A,50.05 +A,49.95 +A,50.60 +A,50.50 +A,49.05 +A,49.95 +A,50.95 +A,50.00 +A,50.00 +B,66.00 +B,66.60 +B,65.05 +B,65.95 +B,65.55 +B,66.00 +B,66.05 +B,66.55 +B,65.55 +B,65.55 +B,66.55 +C,70.00 +C,70.00 +D,45.65 +D,45.60 +D,45.50 +D,45.20 +D,45.55 +D,45.55 +D,45.20 +D,45.55 +D,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Person,Height,Sport,Job +Fred,140cm,Diving,Accountant +Darian,175cm,Running,Student +Jake,188cm,Shotput,Moving Heavy Objects
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Blood_Type Favourite_Colour frequency +AB Blue 2 +O Green 2 +O Blue 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/frequency.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,8 @@ +Blood_Type Favourite_Colour frequency +AB- Orange 1 +AB Blue 2 +O- Green 1 +O Green 2 +O Blue 2 +B Teal 1 +A Blue 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gathered.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Favourite Food,Favourite Colour,Height,BloodType,1,2 +Pineapple,Off White,165cm,O,Person,Jack +Protein,All of them,250cm,O-,Person,Bob +Potatos,Blue,175cm,O,Person,Darian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illegal.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,6 @@ +Test,A,B,C +D,,S,C +F,F,F,F +F,F,F, +TT,TT,TT,TT +Agh,Ol,As,TT \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illegal_collapse.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,5 @@ +C,Test +C,D +F,F +,F +TT,TT; Agh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/joined.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,2 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job +Darian,Potatos,Blue,175cm,O,175cm,Running,Student
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/joined_filled.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,6 @@ +Person,Favourite Food,Favourite Colour,Height,BloodType,Height,Sport,Job +Jack,Pineapple,Off White,165cm,O,a,a,a +Bob,Protein,All of them,250cm,O-,a,a,a +Darian,Potatos,Blue,175cm,O,175cm,Running,Student +Fred,a,a,a,a,140cm,Diving,Accountant +Jake,a,a,a,a,188cm,Shotput,Moving Heavy Objects
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kv.txt Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,3 @@ +Key Value +Dog Big +Cat Small \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mutate_removed.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,11 @@ +Blood_Type Favourite_Colour Height new_column +AB Blue 175cm Darian +AB- Orange 185cm +AB Blue 160cm +O Blue 2000cm +O- Green 20cm +O Green 140cm +O Green 1cm +O Blue 22cm +B Teal 11cm +A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mutated.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,11 @@ +Name Blood_Type Favourite_Colour Height new_column +Darian AB Blue 175cm Darian +Fred AB- Orange 185cm Fred +Jacob AB Blue 160cm Jacob +Adrian O Blue 2000cm Adrian +Tim O- Green 20cm Tim +Matthew O Green 140cm Matthew +Patrick O Green 1cm Patrick +Chester O Blue 22cm Chester +Kim B Teal 11cm Kim +Sarah A Blue 123cm Sarah
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/other.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Name,Sport,Job +Fred,Volleyball,Molecular Biologist +Adrian,Basketball,Computational Biologist +Steven,Football,Microbiologist \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/plot.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,33 @@ +Group,Length,GC Content +A,100,50.00 +A,100,50.05 +A,100,49.95 +A,110,50.60 +A,105,50.50 +A,101,49.05 +A,99,49.95 +A,95,50.95 +A,100,50.00 +A,100,50.00 +B,90,66.00 +B,100,66.60 +B,100,65.05 +B,101,65.95 +B,101,65.55 +B,99,66.00 +B,95,66.05 +B,100,66.55 +B,105,65.55 +B,100,65.55 +B,110,66.55 +C,110,70.00 +C,100,70.00 +D,90,45.65 +D,99,45.60 +D,99,45.50 +D,95,45.20 +D,95,45.55 +D,100,45.55 +D,100,45.20 +D,100,45.55 +D,100,45.50 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_1.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,1-Dog +Mittens,2-Cat +Fuzzy,3-Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_2.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,Big +Mittens,Small +Fuzzy,Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_input.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,4 @@ +Name,Animal +Shirley,Dog +Mittens,Cat +Fuzzy,Chinchilla
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampled_1.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,16 @@ +Group,Length,GC Content +A,100,50.00 +A,101,49.05 +A,100,50.00 +A,100,50.00 +B,90,66.00 +B,100,66.60 +B,101,65.55 +B,99,66.00 +B,105,65.55 +D,90,45.65 +D,99,45.60 +D,99,45.50 +D,95,45.20 +D,95,45.55 +D,100,45.20
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sampled_2.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,25 @@ +n,Group,Length,GC Content +1,A,100,50.00 +4,A,110,50.60 +5,A,105,50.50 +6,A,101,49.05 +9,A,100,50.00 +10,A,100,50.00 +11,B,90,66.00 +12,B,100,66.60 +13,B,100,65.05 +14,B,101,65.95 +15,B,101,65.55 +16,B,99,66.00 +19,B,105,65.55 +20,B,100,65.55 +22,C,110,70.00 +24,D,90,45.65 +25,D,99,45.60 +26,D,99,45.50 +27,D,95,45.20 +28,D,95,45.55 +29,D,100,45.55 +30,D,100,45.20 +31,D,100,45.55 +32,D,100,45.50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/separated_1.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type 1 2 +AB- Fred NA +AB Darian Jacob +O- Tim NA +O Adrian Matthew +B Kim NA +A Sarah NA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/separated_2.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,7 @@ +Blood_Type Name 1 2 +AB- Fred Fred N/A +AB Darian; Jacob Darian Jacob +O- Tim Tim N/A +O Adrian; Matthew; Patrick; Chester Adrian Matthew; Patrick; Chester +B Kim Kim N/A +A Sarah Sarah N/A
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_order.txt Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,3 @@ +Zebra +Cat +Dog
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_1.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,10 @@ +Name,Animal,Random_Number +Stripes,Zebra,7 +Mittens,Cat,16 +Slippers,Cat,11 +Muffin,Cat,7 +Gravy,Cat,6 +Sir-Wags-A-Lot,Dog,44 +Fred,Dog,5 +Earl,Dog,2 +Spots,Dog,1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.csv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,5 @@ +Name,Colour,Food +Eric,Blue,Apples +Darian,Blue,Pancakes +Daniel,Red,Apples +Emily,Blue,Apples \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/uniq.tsv Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,8 @@ +Name Blood_Type Favourite_Colour Height +Darian AB Blue 175cm +Fred AB- Orange 185cm +Adrian O Blue 2000cm +Tim O- Green 20cm +Matthew O Green 140cm +Kim B Teal 11cm +Sarah A Blue 123cm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniq.xml Tue May 19 17:16:58 2020 -0400 @@ -0,0 +1,173 @@ +<tool id="csvtk_uniq" name="csvtk-unique" version="@VERSION@+@GALAXY_VERSION@"> + <description> first values in column(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_cmd" /> + <command detect_errors="exit_code"><![CDATA[ + +################### +## Start Command ## +################### + +csvtk uniq --num-cpus "\${GALAXY_SLOTS:-1}" + + ## Add additional flags as specified ## + ####################################### + $ignore_case + $global_param.illegal_rows + $global_param.empty_rows + $global_param.header + $global_param.lazy_quotes + + ## Set Tabular input/output flag if input is tabular ## + ####################################################### + #if $in_1.is_of_type("tabular"): + -t -T + #end if + + ## Set Input ## + ############### + '$in_1' + + ## Specify fields for unique ## + ############################### + -F -f '$column_text.in_text' + + ## If Only Wanted input columns ## + ################################## + #if $only_input_columns == 'Yes' + | csvtk cut -F -f '$column_text.in_text' + + #if $in_1.is_of_type("tabular"): + -t -T + #end if + #end if + + ## To output ## + ############### + > uniq + + ]]></command> + <inputs> + <expand macro="singular_input" /> + <expand macro="fields_input" /> + <expand macro="ignore_case" /> + <param name="only_input_columns" type="boolean" + truevalue="Yes" + falsevalue="No" + label="Keep only input column(s)" + help="All columns not specified as part of the input will be removed" + /> + <expand macro="global_parameters" /> + </inputs> + <outputs> + <data format_source="in_1" name="uniq" from_work_dir="uniq" label="${in_1.name} unique values in column(s) ${column_text.in_text}" /> + </outputs> + <tests> + <test> + <param name="in_1" value="blood_type.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="2,3" /> + </conditional> + <output name="uniq" file="uniq.tsv" ftype="tabular" /> + </test> + </tests> + <help><![CDATA[ + +Csvtk - Unique Help +------------------- + +Info +#### + +Csvtk unique pulls the first unique values from the column(s) specified to create + +.. class:: warningmark + + Single quotes are not allowed in text inputs! + +@HELP_INPUT_DATA@ + + +Usage +##### + +To run csvtk-uniq, all you need is a valid (as defined above) CSV or TSV and the column(s) that you want the to find unique values in. + +**Ex. Unique on one column:** + +Suppose you had a table as such + ++--------+--------+----------+ +| Name | Colour | Food | ++========+========+==========+ +| Eric | Blue | Apples | ++--------+--------+----------+ +| Darian | Blue | Pancakes | ++--------+--------+----------+ +| Daniel | Red | Apples | ++--------+--------+----------+ +| Frank | Blue | Apples | ++--------+--------+----------+ + +If you ran csvtk-unique on the Colour column, you would end up with the following table: + ++--------+--------+----------+ +| Name | Colour | Food | ++========+========+==========+ +| Eric | Blue | Apples | ++--------+--------+----------+ +| Daniel | Red | Apples | ++--------+--------+----------+ + +As only the first instance of the value in the column is kept. + +---- + +**Ex. Unique on more than one column:** + +Same input table + ++--------+--------+----------+ +| Name | Colour | Food | ++========+========+==========+ +| Eric | Blue | Apples | ++--------+--------+----------+ +| Darian | Blue | Pancakes | ++--------+--------+----------+ +| Daniel | Red | Apples | ++--------+--------+----------+ +| Frank | Blue | Apples | ++--------+--------+----------+ + +Now if you ran csvtk-unique on the Colour and Food column, you would end up with the following table: + ++--------+--------+----------+ +| Name | Colour | Food | ++========+========+==========+ +| Eric | Blue | Apples | ++--------+--------+----------+ +| Darian | Blue | Pancakes | ++--------+--------+----------+ +| Daniel | Red | Apples | ++--------+--------+----------+ + +Only the last row was not kept as there was already a unique pair found of "Blue" and "Apples". + +**Note:** If you want to only have the unique values of the columns and get rid of the additional columns, run a "cut program" to cut out the wanted columns +and then csvtk unique to only get the wanted values. + +---- + +@HELP_COLUMNS@ + + +@HELP_END_STATEMENT@ + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file