# HG changeset patch # User bgruening # Date 1377506323 14400 # Node ID 4382d742830fcc22e67e7f3d4a9f9596d4cb03c2 Uploaded diff -r 000000000000 -r 4382d742830f uniq.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniq.py Mon Aug 26 04:38:43 2013 -0400 @@ -0,0 +1,36 @@ +import sys +import subprocess + +""" + We only need that file because galaxy do not understand the -t $'\t' term. + Otherwise that would be the right XML-only solution: + sort -u + $ignore_case + $is_numeric + -t \$'\t' + #if $adv_opts.adv_opts_selector=="advanced": + -k$adv_opts.column_start,$adv_opts.column_end + #end if + -o $outfile + $input +""" + +if sys.argv[1].strip() != 'false': + ignore_case = sys.argv[1] +else: + ignore_case = '' + +if sys.argv[2].strip() != 'false': + is_numeric = sys.argv[2] +else: + is_numeric = '' + +try: + col_start = sys.argv[3] + col_end = sys.argv[4] + com = "sort -u %s %s -t ' ' -k%s,%s -o %s %s" % (is_numeric, ignore_case, col_start, col_end, sys.argv[5], sys.argv[6]) +except: + # no advanced options selected + com = "sort -u %s %s -t ' ' -o %s %s" % (is_numeric, ignore_case, sys.argv[3], sys.argv[4]) + +subprocess.call(com, shell=True) diff -r 000000000000 -r 4382d742830f uniq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/uniq.xml Mon Aug 26 04:38:43 2013 -0400 @@ -0,0 +1,75 @@ + + occurrences of each record + + unique_lines.py + $ignore_case + $is_numeric + #if $adv_opts.adv_opts_selector=="advanced": + $adv_opts.column_start + $adv_opts.column_end + #end if + $outfile + $input + + + + + + + + + + + + + + + + + + + + + + + + + + + .. class:: infomark + +**Syntax** + +This tool returns all unique lines using the 'sort -u' command. + +----- + +.. class:: infomark + +The input file needs to be tab separated. Please convert your file if necessary. + +----- + +**Example** + +- Input file:: + + chr1 10 100 gene1 + chr1 105 200 gene2 + chr1 10 100 gene1 + chr2 10 100 gene4 + chr2 1000 1900 gene5 + chr3 15 1656 gene6 + chr2 10 100 gene4 + +- Unique lines will result in:: + + chr1 10 100 gene1 + chr1 105 200 gene2 + chr2 10 100 gene4 + chr2 1000 1900 gene5 + chr3 15 1656 gene6 + + + +