Mercurial > repos > bgruening > text_processing
view sorted_uniq.xml @ 21:86755160afbf draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit c2b1677d1c94433f777c2dc28ac8eec0a99cc6a7
author | bgruening |
---|---|
date | Fri, 16 Aug 2024 10:41:54 +0000 |
parents | 12615d397df7 |
children |
line wrap: on
line source
<tool id="tp_uniq_tool" name="Unique lines" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>assuming sorted input file</description> <macros> <import>macros.xml</import> </macros> <expand macro="creator"/> <expand macro="requirements"> <requirement type="package" version="4.8">sed</requirement> </expand> <version_command>uniq --version | head -n 1</version_command> <command> <![CDATA[ uniq #if $skipfields: -f $skipfields #end if $ignorecase #if $grouping.grouping_select == 'yes': --group=$grouping.group #else: $grouping.count $grouping.repeated $grouping.uniqueonly #end if '$infile' #if $grouping.grouping_select == 'no' and $grouping.count: ## count will print the count with spaces in front of the line and ## with a space (not a tab) after the number, we need to cahnge that | sed -e 's/ *//' -e 's/ /\t/' #end if > '$outfile' ]]> </command> <inputs> <param name="infile" format="tabular" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" /> <conditional name="grouping"> <param name="grouping_select" type="select" label="Do you want to group each unique group?"> <option value="no">No</option> <option value="yes">Yes</option> </param> <when value="no"> <param name="count" type="boolean" truevalue="-c" falsevalue="" label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" /> <param name="repeated" type="boolean" truevalue="-d" falsevalue="" label="Only print duplicate lines" help="(-d)"/> <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue="" label="Only print unique lines" help="(-u)" /> </when> <when value="yes"> <param name="group" type="select" label="Output all lines, and delimit each unique group" help="(--group)"> <option value="">No grouping at all</option> <option value="separate">Separate unique groups with a single delimiter</option> <option value="prepend">Output a delimiter before each group of unique items</option> <option value="append">Output a delimiter after each group of unique items</option> <option value="both">Output a delimiter around each group of unique items</option> </param> </when> </conditional> <param name="ignorecase" type="boolean" truevalue="-i" falsevalue="" label="Ignore differences in case when comparing" help="(-i)"/> <param name="skipfields" type="integer" value="0" label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" /> </inputs> <outputs> <data name="outfile" format_source="infile" metadata_source="infile"/> </outputs> <tests> <test> <param name="infile" value="sorted_uniq1.tabular" /> <param name="grouping_select" value="no"/> <param name="count" value="True"/> <param name="ignorecase" value="True"/> <param name="uniqueonly" value="True"/> <output name="outfile" file="sorted_uniq_results1.tabular" /> </test> <test> <param name="infile" value="sorted_uniq1.tabular" /> <param name="ignorecase" value="True"/> <param name="grouping_select" value="yes"/> <param name="group" value="separate"/> <output name="outfile" file="sorted_uniq_results2.tabular" /> </test> </tests> <help> <![CDATA[ This tool takes a sorted file and look for lines that are unique. .. class:: warningmark Please make sure your file is sorted, or else this tool will give you an erroneous output. .. class:: infomark You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools". ]]> </help> <expand macro="citations" /> </tool>