Mercurial > repos > bgruening > text_processing
changeset 0:5314e5d6f040 draft
Imported from capsule None
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ansi2html.sh Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,331 @@ +#!/bin/sh + +# Convert ANSI (terminal) colours and attributes to HTML + +# Author: +# http://www.pixelbeat.org/docs/terminal_colours/ +# Examples: +# ls -l --color=always | ansi2html.sh > ls.html +# git show --color | ansi2html.sh > last_change.html +# Generally one can use the `script` util to capture full terminal output. +# Changes: +# V0.1, 24 Apr 2008, Initial release +# V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com> +# Support `git diff --color` output by +# matching ANSI codes that specify only +# bold or background colour. +# P@draigBrady.com +# Support `ls --color` output by stripping +# redundant leading 0s from ANSI codes. +# Support `grep --color=always` by stripping +# unhandled ANSI codes (specifically ^[[K). +# V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ +# Remove cat -v usage which mangled non ascii input. +# Cleanup regular expressions used. +# Support other attributes like reverse, ... +# P@draigBrady.com +# Correctly nest <span> tags (even across lines). +# Add a command line option to use a dark background. +# Strip more terminal control codes. +# V0.4, 17 Sep 2009, P@draigBrady.com +# Handle codes with combined attributes and color. +# Handle isolated <bold> attributes with css. +# Strip more terminal control codes. +# V0.12, 12 Jul 2011 +# http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh + +if [ "$1" = "--version" ]; then + echo "0.12" && exit +fi + +if [ "$1" = "--help" ]; then + echo "This utility converts ANSI codes in data passed to stdin" >&2 + echo "It has 2 optional parameters:" >&2 + echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 + echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 + exit +fi + +[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } + +if [ "$1" = "--palette=solarized" ]; then + # See http://ethanschoonover.com/solarized + P0=073642; P1=D30102; P2=859900; P3=B58900; + P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; + P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; + P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; + shift; +elif [ "$1" = "--palette=solarized-xterm" ]; then + # Above mapped onto the xterm 256 color palette + P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; + P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; + P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; + P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; + shift; +elif [ "$1" = "--palette=tango" ]; then + # Gnome default + P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; + P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; + P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; + P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; + shift; +elif [ "$1" = "--palette=xterm" ]; then + P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; + P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; + P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; + P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; + shift; +else # linux console + P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; + P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; + P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; + P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; + [ "$1" = "--palette=linux" ] && shift +fi + +[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } + +echo -n "<html> +<head> +<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/> +<style type=\"text/css\"> +.ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; } +.ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; } +.ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; } +.ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; } +.ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; } +.ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; } +.ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; } +.ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; } +.ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; } +.ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; } +.ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; } +.ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; } +.ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; } +.ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; } +.ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; } +.ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; } +.eb8 { background-color: #$P8; } +.eb9 { background-color: #$P9; } +.eb10 { background-color: #$P10; } +.eb11 { background-color: #$P11; } +.eb12 { background-color: #$P12; } +.eb13 { background-color: #$P13; } +.eb14 { background-color: #$P14; } +.eb15 { background-color: #$P15; } +" + +# The default xterm 256 colour palette +for red in $(seq 0 5); do + for green in $(seq 0 5); do + for blue in $(seq 0 5); do + c=$((16 + ($red * 36) + ($green * 6) + $blue)) + r=$((($red * 40 + 55) * ($red > 0))) + g=$((($green * 40 + 55) * ($green > 0))) + b=$((($blue * 40 + 55) * ($blue > 0))) + printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b + printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b + done + done +done +for gray in $(seq 0 23); do + c=$(($gray+232)) + l=$(($gray*10 + 8)) + printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l + printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l +done + +echo -n ' +.f9 { color: '`[ "$dark_bg" ] && echo "#$P7;" || echo "#$P0;"`' } +.b9 { background-color: #'`[ "$dark_bg" ] && echo $P0 || echo $P15`'; } +.f9 > .bold,.bold > .f9, body.f9 > pre > .bold { + /* Bold is heavy black on white, or bright white + depending on the default background */ + color: '`[ "$dark_bg" ] && echo "#$P15;" || echo "#$P0;"`' + font-weight: '`[ "$dark_bg" ] && echo 'normal;' || echo 'bold;'`' +} +.reverse { + /* CSS doesnt support swapping fg and bg colours unfortunately, + so just hardcode something that will look OK on all backgrounds. */ + '"color: #$P0; background-color: #$P7;"' +} +.underline { text-decoration: underline; } +.line-through { text-decoration: line-through; } +.blink { text-decoration: blink; } + +</style> +</head> + +<body class="f9 b9"> +<pre> +' + +p='\x1b\[' #shortcut to match escape codes +P="\(^[^°]*\)¡$p" #expression to match prepended codes below + +# Handle various xterm control sequences. +# See /usr/share/doc/xterm-*/ctlseqs.txt +sed " +s#\x1b[^\x1b]*\x1b\\\##g # strip anything between \e and ST +s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.) + +#handle carriage returns +s#^.*\r\{1,\}\([^$]\)#\1# +s#\r\$## # strip trailing \r + +# strip other non SGR escape sequences +s#[\x07]##g +s#\x1b[]>=\][0-9;]*##g +s#\x1bP+.\{5\}##g +s#${p}[0-9;?]*[^0-9;?m]##g + +#remove backspace chars and what they're backspacing over +:rm_bs +s#[^\x08]\x08##g; t rm_bs +" | + +# Normalize the input before transformation +sed " +# escape HTML +s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g + +# normalize SGR codes a little + +# split 256 colors out and mark so that they're not +# recognised by the following 'split combined' line +:e +s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e +s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g; + +:c +s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c # split combined +s#${p}0\([0-7]\)#${p}\1#g #strip leading 0 +s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g #bold last (with clr) +s#${p}m#${p}0m#g #add leading 0 to norm + +# undo any 256 color marking +s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g; + +# map 16 color codes to color + bold +s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g; +s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g; + +# change 'reset' code to a single char, and prepend a single char to +# other codes so that we can easily do negative matching, as sed +# does not support look behind expressions etc. +s#°#\°#g; s#${p}0m#°#g +s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g +" | + +# Convert SGR sequences to HTML +sed " +:ansi_to_span # replace ANSI codes with CSS classes +t ansi_to_span # hack so t commands below only apply to preceeding s cmd + +/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code + +# common combinations to minimise html (optional) +s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count +s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count + +s#${P}1m#\1<span class=\"bold\">#; t span_count +s#${P}4m#\1<span class=\"underline\">#; t span_count +s#${P}5m#\1<span class=\"blink\">#; t span_count +s#${P}7m#\1<span class=\"reverse\">#; t span_count +s#${P}9m#\1<span class=\"line-through\">#; t span_count +s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#; t span_count +s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#; t span_count + +s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#; t span_count +s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#; t span_count + +s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes + +b # next line of input + +# add a corresponding span end flag +:span_count +x; s/^/s/; x +b ansi_to_span + +# replace 'reset code' with correct number of </span> tags +:span_end +x +/^s/ { + s/^.// + x + s#°#</span>°# + b span_end +} +x +s#°## +b ansi_to_span +" | + +# Convert alternative character set +# Note we convert here, as if we do at start we have to worry about avoiding +# conversion of SGR codes etc., whereas doing here we only have to +# avoid conversions of stuff between &...; or <...> +# +# Note we could use sed to do this based around: +# sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/' +# However that would be very awkward as we need to only conv some input. +# The basic scheme that we do in the python script below is: +# 1. enable transliterate once ¡ char seen +# 2. disable once µ char seen (may be on diff line to ¡) +# 3. never transliterate between &; or <> chars +sed " +# change 'smacs' and 'rmacs' to a single char so that we can easily do +# negative matching, as sed does not support look behind expressions etc. +# Note we don't use ° like above as that's part of the alternate charset. +s#\x1b(0#¡#g; +s#µ#\µ#g; s#\x1b(B#µ#g +" | +( +python -c " +# vim:fileencoding=utf8 + +import sys +import locale +encoding=locale.getpreferredencoding() + +old='abcdefghijklmnopqrstuvwxyz{}\`~' +new='▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·' +new=unicode(new, 'utf-8') +table=range(128) +for o,n in zip(old, new): table[ord(o)]=n + +(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3) + +state = STANDARD +last_mode = STANDARD +for c in unicode(sys.stdin.read(), encoding): + if state == HTML_TAG: + if c == '>': + state = last_mode + elif state == HTML_ENTITY: + if c == ';': + state = last_mode + else: + if c == '<': + state = HTML_TAG + elif c == '&': + state = HTML_ENTITY + elif c == u'¡' and state == STANDARD: + state = ALTERNATIVE + last_mode = ALTERNATIVE + continue + elif c == u'µ' and state == ALTERNATIVE: + state = STANDARD + last_mode = STANDARD + continue + elif state == ALTERNATIVE: + c = c.translate(table) + sys.stdout.write(c.encode(encoding)) +" 2>/dev/null || +sed 's/[¡µ]//g' # just strip aternative flag chars +) + +echo "</pre> +</body> +</html>"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/awk.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,124 @@ +<tool id="tp_awk_tool" name="Text reformatting" version="@BASE_VERSION@.0"> + <description>with awk</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.1.0">gnu_awk</requirement> + </expand> + <version_command>awk --version | head -n 1</version_command> + <command> +<![CDATA[ + awk + --sandbox + -v FS=' ' + -v OFS=' ' + --re-interval + -f "$awk_script" + "$infile" + > "$outfile" +]]> + </command> + <inputs> + <param name="infile" format="txt" type="data" label="File to process" /> + <param name="code" type="text" area="true" size="5x35" label="AWK Program" help=""> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <configfiles> + <configfile name="awk_script">$code</configfile> + </configfiles> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="infile" value="awk1.txt" /> + <!-- commas are not allowed in a value field. Values with comma will be splitted --> + <param name="code" value='$2>0.5 { print $2*9"\t"$1 }' /> + <output name="outfile" file="awk_results1.txt" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool runs the unix **awk** command on the selected data file. + +.. class:: infomark + +**TIP:** + +This tool uses the **extended regular** expression syntax (not the perl syntax). +**\\d**, **\\w**, **\\s** etc. are **not** supported. + + +**Further reading** + +- Awk by Example (http://www.ibm.com/developerworks/linux/library/l-awk1.html) +- Long AWK tutorial (http://www.grymoire.com/Unix/Awk.html) +- Learn AWK in 1 hour (http://www.selectorweb.com/awk.html) +- awk cheat-sheet (http://cbi.med.harvard.edu/people/peshkin/sb302/awk_cheatsheets.pdf) +- Collection of useful awk one-liners (http://student.northpark.edu/pemente/awk/awk1line.txt) + +----- + +**AWK programs** + +Most AWK programs consist of **patterns** (i.e. rules that match lines of text) and **actions** (i.e. commands to execute when a pattern matches a line). + +The basic form of AWK program is:: + + pattern { action 1; action 2; action 3; } + + +**Pattern Examples** + +- **$2 == "chr3"** will match lines whose second column is the string 'chr3' +- **$5-$4>23** will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23. +- **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.) +- **$7 ~ /A{4}U/** will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.) +- **10000 < $4 && $4 < 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000 +- If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines). + + +**Action Examples** + +- **{ print }** or **{ print $0 }** will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'. +- **{ print $1, $4, $5 }** will print only the first, fourth and fifth fields of the input line. +- **{ print $4, $5-$4 }** will print the fourth column and the difference between the fifth and fourth column. (If the fourth column was start-position in the input file, and the fifth column was end-position - the output file will contain the start-position, and the length). +- If no action part is specified (not even the curly brackets) - the default action is to print the entire line. + + +**AWK's Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,259 @@ +<tool id="tp_cut_tool" name="Cut" version="@BASE_VERSION@.0"> + <description>columns from a table</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>cut --version | head -n 1</version_command> + <command> +<![CDATA[ + cut + #if str($delimiter) != '': + -d"${delimiter}" + #end if + ${complement} + ${cut_type_options.cut_element} + '${cut_type_options.list}' + '${input}' + > '${output}' +]]> + </command> + <inputs> + <param name="input" format="txt" type="data" label="File to cut" /> + <param name="complement" type="select" label="Operation"> + <option value="">Keep</option> + <option value="--complement">Discard</option> + </param> + <param name="delimiter" type="select" label="Delimited by"> + <option value="">Tab</option> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> + </param> + <conditional name="cut_type_options"> + <param name="cut_element" type="select" label="Cut by"> + <option value="-f">fields</option> + <option value="-c">characters</option> + <option value="-b">bytes</option> + </param> + <when value="-f"> + <param name="list" type="data_column" data_ref="input" multiple="true" label="List of Fields" help="(-f)" /> + </when> + <when value="-c"> + <param name="list" type="text" size="20" value="" label="List of characters" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + <when value="-b"> + <param name="list" type="text" size="20" value="" label="List of Bytes" help="These will be kept/discarded (depending on 'operation'). <BR /> Examples: 1,3,4 or 2-5"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </when> + </conditional> + </inputs> + <outputs> + <!--<data format="tabular" name="output" />--> + <!-- WIP, not sure that will work with the complement option --> + <data format="tabular" name="output"> + <actions> + <conditional name="cut_type_options.cut_element"> + <!-- fields --> + <when value="-f"> + <conditional name="delimiter"> + <when value="T"> + <conditional name="input"> + <when datatype_isinstance="interval"> + <action type="format" default="tabular"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 1--> + <filter type="insert_column" column="0" value="interval"/> + <filter type="insert_column" ref="list" /> <!-- startCol --> + <filter type="insert_column" ref="list" /> <!-- endCol --> + + <filter type="multiple_splitter" column="1" separator=","/> + <filter type="column_strip" column="1"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="1" name="lower" /> + <filter type="param_value" column="1" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="1" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="1" cast="int" /> + + <filter type="multiple_splitter" column="2" separator=","/> + <filter type="column_strip" column="2"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="2" name="lower" /> + <filter type="param_value" column="2" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="2" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="2" cast="int" /> + + <filter type="multiple_splitter" column="3" separator=","/> + <filter type="column_strip" column="3"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="3" name="lower" /> + <filter type="param_value" column="3" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="3" strip="c"/> <!-- get rid of c's --> + <filter type="boolean" column="3" cast="int" /> + + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + <filter type="metadata_value" ref="input" name="startCol" column="2" /> + <filter type="metadata_value" ref="input" name="endCol" column="3" /> + </option> + </action> + <conditional name="output"> + <when datatype_isinstance="interval"> + <action type="metadata" name="chromCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- chromCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="chromCol" column="1" /> + </option> + </action> + + <action type="metadata" name="startCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- startCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="startCol" column="1" /> + </option> + </action> + + <action type="metadata" name="endCol"> + <option type="from_param" name="list" column="0" offset="0"> <!-- endCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="endCol" column="1" /> + </option> + </action> + + <action type="metadata" name="nameCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- nameCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="nameCol" column="1" /> + </option> + </action> + + <action type="metadata" name="strandCol" default="0"> + <option type="from_param" name="list" column="0" offset="0"> <!-- strandCol is 0--> + <filter type="multiple_splitter" column="0" separator=","/> + <filter type="column_strip" column="0"/> <!-- get rid of all external whitespace --> + <filter type="string_function" column="0" name="lower" /> + <filter type="param_value" column="0" value="^c\d{1,}$" compare="re_search" keep="True"/> + <filter type="column_strip" column="0" strip="c"/> <!-- get rid of c's --> + <filter type="insert_column" value="1" iterate="True" column="0"/> + <filter type="boolean" column="1" cast="int" /> + <filter type="metadata_value" ref="input" name="strandCol" column="1" /> + </option> + </action> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + </when> + </conditional> + </actions> + </data> + </outputs> + <tests> + <test> + <param name="input" value="cut1.txt"/> + <param name="list" value="1,3,4"/> + <param name="delimiter" value=""/> + <output name="output" file="cut_results1.txt"/> + </test> + <test> + <param name="input" value="cut1.txt"/> + <param name="list" value="2" /> + <param name="delimiter" value="" /> + <param name="complement" value="--complement" /> + <output name="output" file="cut_results2.txt"/> + </test> + <test> + <param name="input" value="cut1.txt"/> + <param name="list" value="-3" /> + <param name="delimiter" value="" /> + <param name="cut_element" value="-c" /> + <output name="output" file="cut_results3.txt"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool runs the **cut** unix command, which extract or delete columns from a file. + +----- + +Field List Example: + +**1,3,7** - Cut specific fields/characters. + +**3-** - Cut from the third field/character to the end of the line. + +**2-5** - Cut from the second to the fifth field/character. + +**-8** - Cut from the first to the eight field/characters. + + +Input Example:: + + fruit color price weight + apple red 1.4 0.5 + orange orange 1.5 0.3 + banana yellow 0.9 0.3 + + +Output Example ( **Keeping fields 1,3,4** ):: + + fruit price weight + apple 1.4 0.5 + orange 1.5 0.3 + banana 0.9 0.3 + +Output Example ( **Discarding field 2** ):: + + fruit price weight + apple 1.4 0.5 + orange 1.5 0.3 + banana 0.9 0.3 + +Output Example ( **Keeping 3 characters** ):: + + fru + app + ora + ban + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/easyjoin Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,308 @@ +#!/usr/bin/env perl +## EASY Join - +## Join with automatic pre-sorting of both files +## Copyright (C) 2010 A. Gordon (gordon@cshl.edu) +## license: AGPLv3+ +use strict; +use warnings; +use Data::Dumper; +use Getopt::Long qw(:config bundling no_ignore_case_always); +use File::Temp qw/tempfile/; +use POSIX qw(locale_h); + +sub show_help(); +sub show_version(); +sub show_examples(); +sub parse_commandline_options(); +sub sort_file($$$); +sub join_files($$); +sub cleanup_files(@); + + +my $PROGRAM="easyjoin"; +my $VERSION="0.6.1"; + +my $debug=undef; +my $HEADER=undef; +my $IGNORE_CASE=undef; +my $FIELD_SEP=undef; +my $FILE1_KEY_COLUMN=1; +my $FILE2_KEY_COLUMN=1; +my @OUTPUT_SPECIFIERS=(); +my $OUTPUT_FORMAT=undef; +my $EMPTY_FILLER=undef; +my $SORT_BUFFER_SIZE=undef; +my $SORT_TEMP_DIR=undef; +my $input_filename1; +my $input_filename2; + +## +## Program Start +## +$ENV{'LANG'}="C";## "C" locale is critical for sorting and joining correctly +parse_commandline_options(); +my (undef, $tmp_filename1) = tempfile(OPEN=>0); +my (undef, $tmp_filename2) = tempfile(OPEN=>0); +sort_file($input_filename1, $tmp_filename1, $FILE1_KEY_COLUMN); +sort_file($input_filename2, $tmp_filename2, $FILE2_KEY_COLUMN); +my $join_exit_code = join_files($tmp_filename1, $tmp_filename2); +cleanup_files($tmp_filename1, $tmp_filename2); +exit($join_exit_code); + +## +## Program end +## + + +sub show_help() +{ +print<<EOF; +${PROGRAM}: Wrapper for GNU join+sort, automaticalyl sorts files before joining them. + +Usage: $PROGRAM [OPTIONS] [JOIN-OPTIONS] [SORT-OPTIONS] FILE1 FILE2 + +OPTIONS: Options specific to this program: + + --header = Both input files have a header line as the first line. + The header line will be joined properly, without being sorted. + + --version = Print ${PROGRAM}'s version. + + --debug = Print debug messages (relating to ${PROGRAM}'s operation). + + --help = Show this help screen. + + --example = Show usage examples. + + --all = Short-cut for: + -a 1 -a 2 -o auto -e . -t <TAB> + This will show all values (paired and unpared) from both files, + Automatically formatting the columns, and using TAB as field separator. + You can override the empty filler (-e X) on the command line. + + --allh = Short-cut for: + -a 1 -a 2 -o auto -e . -t <TAB> --header + Same as above, but will also respect the header line from both input files. + +JOIN-OPTIONS: + All of GNU join options are supported. + Run: + join --help + To see all possible joining options. + +SORT-OPTIONS: + The following options are supported for the intermediate sorting step: + + -S SIZE + --buffer-size SIZE = GNU sort's --buffer-size option. + + -T DIR + --temporary-directory DIR = GNU sort's --temporary-directory option. + + Run: + sort --help + To learn about these options. They might improve sorting performances for big files. + +FILE1 FILE2: + The two input files to be sorted, joined. + Unlike GNU join, joining STDIN is not supported. Both files must be real files. + + +NOTE About "--header" and "--auto-format": + The "--header" feature requires GNU coreutils version 8.6 or later. + The "-o auto" feature requires GNU coreutils version 8.10 or later. + +EOF + exit(0); +} + +sub show_version() +{ +print<<EOF; +$PROGRAM $VERSION +Copyright (C) 2010 A. Gordon (gordon\@cshl.edu) +License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html) + +To see the GNU's join version, run: + join --version +EOF + exit(0); +} + +sub show_examples() +{ +print<<EOF; +Example of joining two unsorted files (each file having a header line): + +\$ cat input1.txt +Fruit Color +Apple red +Banana yellow +Orange orange +Melon green + +\$ cat input2.txt +Fruit Price +Orange 7 +Avocado 8 +Apple 4 +Banana 3 + +\$ easyjoin -j 1 -a 1 -a 2 --header -e . -o auto input1.txt input2.txt +Fruit Color Price +Apple red 4 +Avocado . 8 +Banana yellow 3 +Melon green . +Orange orange 7 + +## A short-cut for all the options above: +\$ easyjoin --allh input1.txt input2.txt +Fruit Color Price +Apple red 4 +Avocado . 8 +Banana yellow 3 +Melon green . +Orange orange 7 + +EOF + exit(0); +} + +sub parse_commandline_options() +{ + ## + ## Parse command line + ## + my $rc = GetOptions( + "a=i" => sub { push @OUTPUT_SPECIFIERS, '-a', $_[1] }, + "e=s" => \$EMPTY_FILLER, + "ignore-case|i" => \$IGNORE_CASE, + "j=i" => sub { $FILE1_KEY_COLUMN = $_[1] ; $FILE2_KEY_COLUMN = $_[1] ; }, + "o=s" => \$OUTPUT_FORMAT, + "t=s" => \$FIELD_SEP, + "v=i" => sub { push @OUTPUT_SPECIFIERS, '-v', $_[1] }, + "1=i" => \$FILE1_KEY_COLUMN, + "2=i" => \$FILE2_KEY_COLUMN, + "debug" => \$debug, + "header" => \$HEADER, + "help" => \&show_help, + "version" => \&show_version, + "examples" => \&show_examples, + "buffer-size|S=s" => \$SORT_BUFFER_SIZE, + "temporary-directory|T=s" => \$SORT_TEMP_DIR, + "all" => sub { + push @OUTPUT_SPECIFIERS, "-a", 1, "-a", 2; + $FIELD_SEP = "\t"; + $OUTPUT_FORMAT = "auto"; + $EMPTY_FILLER = "." unless defined $EMPTY_FILLER; + }, + "allh" => sub { + push @OUTPUT_SPECIFIERS, "-a", 1, "-a", 2; + $FIELD_SEP = "\t"; + $OUTPUT_FORMAT = "auto"; + $HEADER=1; + $EMPTY_FILLER = "." unless defined $EMPTY_FILLER; + }, + ); + die "$PROGRAM: invalid command-line arguments.\n" unless $rc; + + ## We need two file names to join + my @INPUT_FILES = @ARGV; + die "$PROGRAM: missing operand: two file names to join\n" if (scalar(@INPUT_FILES)<2); + die "$PROGRAM: error: too many files specified (can only join two files)\n" if (scalar(@INPUT_FILES)>2); + die "$PROGRAM: error: input file can't be STDIN, please use a real file name.\n" if $INPUT_FILES[0] eq "-" || $INPUT_FILES[1] eq "-"; + die "$PROGRAM: error: input file 1 '" . $INPUT_FILES[0] . "' not found!" unless -e $INPUT_FILES[0]; + die "$PROGRAM: error: input file 2 '" . $INPUT_FILES[1] . "' not found!" unless -e $INPUT_FILES[1]; + + $input_filename1 = $INPUT_FILES[0]; + $input_filename2 = $INPUT_FILES[1]; +} + +sub sort_file($$$) +{ + my ($input_filename, $output_filename, $key_column) = @_; + + my @SORT_COMMAND; + push @SORT_COMMAND, $HEADER ? "./sort-header" : "sort" ; + push @SORT_COMMAND, "-f" if $IGNORE_CASE; + push @SORT_COMMAND, "-k${key_column},${key_column}" ; + push @SORT_COMMAND, "--buffer-size", $SORT_BUFFER_SIZE if $SORT_BUFFER_SIZE; + push @SORT_COMMAND, "--temporary-directory", $SORT_TEMP_DIR if $SORT_TEMP_DIR; + push @SORT_COMMAND, "--output", $output_filename; + push @SORT_COMMAND, "--debugheader" if $debug && $HEADER; + push @SORT_COMMAND, "-t", $FIELD_SEP if $FIELD_SEP; + push @SORT_COMMAND, $input_filename; + + if ($debug) { + warn "$PROGRAM: Running sort on '$input_filename' => '$output_filename'\n"; + warn "$PROGRAM: Sort command line:\n"; + print STDERR Dumper(\@SORT_COMMAND), "\n"; + } + + my $sort_exit_code=1; + system(@SORT_COMMAND); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'sort': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n"; + } + else { + $sort_exit_code = ($? >> 8); + } + die "$PROGRAM: Error: 'sort' process failed, exit code $sort_exit_code\n" if $sort_exit_code!=0; +} + +sub join_files($$) +{ + my ($file1, $file2) = @_; + + my @join_command = qw/join/; + push @join_command, "--header" if $HEADER; + push @join_command, "--ignore-case" if $IGNORE_CASE; + push @join_command, "-t", $FIELD_SEP if $FIELD_SEP; + push @join_command, "-1", $FILE1_KEY_COLUMN if $FILE1_KEY_COLUMN; + push @join_command, "-2", $FILE2_KEY_COLUMN if $FILE2_KEY_COLUMN; + push @join_command, "-e", $EMPTY_FILLER if defined $EMPTY_FILLER; + push @join_command, "-o", $OUTPUT_FORMAT if $OUTPUT_FORMAT; + push @join_command, @OUTPUT_SPECIFIERS; + push @join_command, $file1, $file2; + + if ($debug) { + warn "$PROGRAM: Running join on '$file1' and '$file2'\n"; + warn "$PROGRAM: join command line:\n"; + print STDERR Dumper(\@join_command), "\n"; + } + + my $join_exit_code=1; + system(@join_command); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'join': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if join was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'join' child-process died with signal $signal\n"; + } + else { + $join_exit_code = ($? >> 8); + } + return $join_exit_code; +} + +sub cleanup_files(@) +{ + my (@files) = @_; + + foreach my $file (@files) { + if ($debug) { + warn "$PROGRAM: debug mode, not deleting temporary file '$file'\n"; + } else { + my $count = unlink $file; + warn "$PROGRAM: Error: failed to delete temporary file '$file': $!\n" if ($count != 1); + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/easyjoin.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,115 @@ +<tool id="tp_easyjoin_tool" name="Join" version="@BASE_VERSION@.0"> + <description>two files</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="set_environment">TP_SCRIPT_PATH</requirement> + </expand> + <version_command>join --version | head -n 1</version_command> + <command> +<![CDATA[ + cp \$TP_SCRIPT_PATH/sort-header ./ && + chmod +x sort-header && + perl \$TP_SCRIPT_PATH/easyjoin + $jointype + -t ' ' + $header + -e '$empty_string_filler' + -o auto + $ignore_case + -1 '$column1' + -2 '$column2' + "$infile1" + "$infile2" + > '$output' +]]> + </command> + <inputs> + <param name="infile1" format="tabular" type="data" label="1st file" /> + <param name="column1" label="Column to use from 1st file" type="data_column" data_ref="infile1" accept_default="true" /> + + <param name="infile2" format="txt" type="data" label="2nd File" /> + <param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="infile2" accept_default="true" /> + + <param name="jointype" type="select" label="Output lines appearing in"> + <option value=" " selected="True">Both 1st & 2nd file.</option> + <option value="-v 1">1st but not in 2nd file. (-v 1)</option> + <option value="-v 2">2nd but not in 1st file. (-v 2)</option> + <option value="-a 1">Both 1st & 2nd file, plus unpairable lines from 1st file. (-a 1)</option> + <option value="-a 2">Both 1st & 2nd file, plus unpairable lines from 2st file. (-a 2)</option> + <option value="-a 1 -a 2">All lines [-a 1 -a 2]</option> + <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option> + </param> + + <param name="header" type="boolean" checked="false" truevalue="--header" falsevalue="" + label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." /> + <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" + label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." /> + <param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="output" format_source="infile1" metadata_source="infile1"/> + </outputs> + <tests> + <test> + <param name="infile1" value="easyjoin1.tabular" /> + <param name="column1" value="1" /> + <param name="infile2" value="easyjoin2.tabular" /> + <param name="column2" value="1" /> + <param name="header" value="True" /> + <param name="jointype" value="-a 1 -a 2" /> + <output name="output" file="easyjoin_result1.tabular" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool joins two tabular files based on a common key column. + +----- + +**Example** + +**First file**:: + + Fruit Color + Apple red + Banana yellow + Orange orange + Melon green + +**Second File**:: + + Fruit Price + Orange 7 + Avocado 8 + Apple 4 + Banana 3 + +**Joining** both files, using **key column 1** and a **header line**, will return:: + + Fruit Color Price + Apple red 4 + Avocado . 8 + Banana yellow 3 + Melon green . + Orange orange 7 + +.. class:: infomark + + * Input files need not be sorted. + * The header line (**Fruit Color Price**) was joined and kept as first line. + * Missing values ( Avocado's color, missing from the first file ) are replaced with a period character. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_and_replace Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,202 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Getopt::Std; + +sub parse_command_line(); +sub build_regex_string(); +sub usage(); + +my $input_file ; +my $output_file; +my $find_pattern ; +my $replace_pattern ; +my $find_complete_words ; +my $find_pattern_is_regex ; +my $find_in_specific_column ; +my $find_case_insensitive ; +my $replace_global ; +my $skip_first_line ; + + +## +## Program Start +## +usage() if @ARGV<2; +parse_command_line(); +my $regex_string = build_regex_string() ; + +# Allow first line to pass without filtering? +if ( $skip_first_line ) { + my $line = <$input_file>; + print $output_file $line ; +} + + +## +## Main loop +## + +## I LOVE PERL (and hate it, at the same time...) +## +## So what's going on with the self-compiling perl code? +## +## 1. The program gets the find-pattern and the replace-pattern from the user (as strings). +## 2. If both the find-pattern and replace-pattern are simple strings (not regex), +## it would be possible to pre-compile a regex (with qr//) and use it in a 's///' +## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references) +## it is still possible to pre-compile the regex and use it in a 's///' +## However, +## 4. If the replace-pattern contains back-references, pre-compiling is not possible. +## (in perl, you can't precompile a substitute regex). +## See these examples: +## http://www.perlmonks.org/?node_id=84420 +## http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl +## +## The solution: +## we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ), +## Then eval() a new perl code that contains the substitution regex as inlined code. +## Gotta love perl! + +my $perl_program ; +if ( $find_in_specific_column ) { + # Find & replace in specific column + + $perl_program = <<EOF; + while ( <STDIN> ) { + chomp ; + my \@columns = split ; + + #not enough columns in this line - skip it + next if ( \@columns < $find_in_specific_column ) ; + + \$columns [ $find_in_specific_column - 1 ] =~ $regex_string ; + + print STDOUT join("\t", \@columns), "\n" ; + } +EOF + +} else { + # Find & replace the entire line + $perl_program = <<EOF; + while ( <STDIN> ) { + $regex_string ; + print STDOUT; + } +EOF +} + + +# The dynamic perl code reads from STDIN and writes to STDOUT, +# so connect these handles (if the user didn't specifiy input / output +# file names, these might be already be STDIN/OUT, so the whole could be a no-op). +*STDIN = $input_file ; +*STDOUT = $output_file ; +eval $perl_program ; + + +## +## Program end +## + + +sub parse_command_line() +{ + my %opts ; + getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n"; + + die "$0: missing Find-Pattern argument\n" if (@ARGV==0); + $find_pattern = $ARGV[0]; + die "$0: missing Replace-Pattern argument\n" if (@ARGV==1); + $replace_pattern = $ARGV[1]; + + $find_complete_words = ( exists $opts{w} ) ; + $find_case_insensitive = ( exists $opts{i} ) ; + $skip_first_line = ( exists $opts{s} ) ; + $find_pattern_is_regex = ( exists $opts{r} ) ; + $replace_global = ( exists $opts{g} ) ; + + # Search in specific column ? + if ( defined $opts{c} ) { + $find_in_specific_column = $opts{c}; + + die "$0: invalid column number ($find_in_specific_column).\n" + unless $find_in_specific_column =~ /^\d+$/ ; + + die "$0: invalid column number ($find_in_specific_column).\n" + if $find_in_specific_column <= 0; + } + else { + $find_in_specific_column = 0 ; + } + + # Output File specified (instead of STDOUT) ? + if ( defined $opts{o} ) { + my $filename = $opts{o}; + open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ; + } else { + $output_file = *STDOUT ; + } + + + # Input file Specified (instead of STDIN) ? + if ( @ARGV>2 ) { + my $filename = $ARGV[2]; + open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ; + } else { + $input_file = *STDIN; + } +} + +sub build_regex_string() +{ + my $find_string ; + my $replace_string ; + + if ( $find_pattern_is_regex ) { + $find_string = $find_pattern ; + $replace_string = $replace_pattern ; + } else { + $find_string = quotemeta $find_pattern ; + $replace_string = quotemeta $replace_pattern; + } + + if ( $find_complete_words ) { + $find_string = "\\b($find_string)\\b"; + } + + my $regex_string = "s/$find_string/$replace_string/"; + + $regex_string .= "i" if ( $find_case_insensitive ); + $regex_string .= "g" if ( $replace_global ) ; + + + return $regex_string; +} + +sub usage() +{ +print <<EOF; + +Find and Replace +Copyright (C) 2009 - by A. Gordon ( gordon at cshl dot edu ) + +Usage: $0 [-o OUTPUT] [-g] [-r] [-w] [-i] [-c N] [-l] FIND-PATTERN REPLACE-PATTERN [INPUT-FILE] + + -g - Global replace - replace all occurences in line/column. + Default - replace just the first instance. + -w - search for complete words (not partial sub-strings). + -i - case insensitive search. + -c N - check only column N, instead of entire line (line split by whitespace). + -l - skip first line (don't replace anything in it) + -r - FIND-PATTERN and REPLACE-PATTERN are perl regular expression, + usable inside a 's///' statement. + By default, they are used as verbatim text strings. + -o OUT - specify output file (default = STDOUT). + INPUT-FILE - (optional) read from file (default = from STDIN). + + +EOF + + exit; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_and_replace.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,178 @@ +<tool id="tp_find_and_replace" name="Replace" version="@BASE_VERSION@.0"> + <description>parts of text</description> + <macros> + <import>macros.xml</import> + </macros> + <command interpreter="perl"> +<![CDATA[ + find_and_replace + #if $searchwhere.searchwhere_select == "column": + -c $searchwhere.column + #end if + -o $outfile + $caseinsensitive + $wholewords + $skip_first_line + $is_regex + '$find_pattern' + '$replace_pattern' + '$infile' +]]> + </command> + <inputs> + <param name="infile" format="txt" type="data" label="File to process" /> + <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param name="replace_pattern" type="text" size="20" label="Replace with" + help="Use simple text, or $& (dollar-ampersand) and $1 $2 $3 to refer to matched text. See examples below." > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param name="is_regex" type="boolean" checked="false" truevalue="-r" falsevalue="" + label="Find-Pattern is a regular expression" help="see help section for details." /> + + <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" + label="Case-Insensitive search" help="" /> + + <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" + label="Find whole-words" help="ignore partial matches (e.g. 'apple' will not match 'snapple')" /> + + <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" + label="Ignore first line" help="Select this option if the first line contains column headers. Text in the line will not be replaced. " /> + + <conditional name="searchwhere"> + <param name="searchwhere_select" type="select" label="Find and Replace text in"> + <option value="line" selected="true">entire line</option> + <option value="column">specific column</option> + </param> + <when value="line" /> + <when value="column"> + <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> + </when> + </conditional> + </inputs> + <outputs> + <data format_source="infile" name="outfile" metadata_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="find_and_replace1.txt" /> + <param name="find_pattern" value="day" /> + <param name="replace_pattern" value="great day" /> + <param name="is_regex" value="False" /> + <param name="caseinsensitive" value="False" /> + <param name="wholewords" value="True" /> + <output name="outfile" file="find_and_replace_results1.txt" /> + </test> + <test> + <param name="infile" value="find_and_replace2.txt" /> + <param name="find_pattern" value="^chr" /> + <param name="replace_pattern" value="" /> + <param name="is_regex" value="True" /> + <param name="caseinsensitive" value="False" /> + <param name="wholewords" value="False" /> + <param name="searchwhere_select" value="column" /> + <param name="column" value="3" /> + <output name="outfile" file="find_and_replace_results2.txt" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool finds $ replaces text in an input dataset. + +.. class:: infomark + +The **pattern to find** can be a simple text string, or a perl **regular expression** string (depending on *pattern is a regex* check-box). + +.. class:: infomark + +When using regular expressions, the **replace pattern** can contain back-references ( e.g. \\1 ) + +.. class:: infomark + +This tool uses Perl regular expression syntax. + +----- + +**Examples of *regular-expression* Find Patterns** + +- **HELLO** The word 'HELLO' (case sensitive). +- **AG.T** The letters A,G followed by any single character, followed by the letter T. +- **A{4,}** Four or more consecutive A's. +- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. +- **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern. + + +**Examples of Replace Patterns** + +- **WORLD** The word 'WORLD' will be placed whereever the find pattern was found. +- **FOO-$&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$&** (dollar-ampersand) represents the matched find pattern. +- **$1** The text which matched the first parenthesis in the Find Pattern. + + +----- + +**Example 1** + +**Find Pattern:** HELLO +**Replace Pattern:** WORLD +**Regular Expression:** no +**Replace what:** entire line + +Every time the word HELLO is found, it will be replaced with the word WORLD. + +----- + +**Example 2** + +**Find Pattern:** ^chr +**Replace Pattern:** (empty) +**Regular Expression:** yes +**Replace what:** column 11 + +If column 11 (of every line) begins with ther letters 'chr', they will be removed. Effectively, it'll turn "chr4" into "4" and "chrXHet" into "XHet" + + +----- + +**Perl's Regular Expression Syntax** + +The Find & Replace tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \\ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\\|** Separates alternate possibilities. +- **\\d** matches a single digit +- **\\w** matches a single letter or digit or an underscore. +- **\\s** matches a single white-space (space or tabs). + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/grep.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,165 @@ +<tool id="tp_grep_tool" name="Search in textfiles" version="@BASE_VERSION@.0"> + <description>(grep)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="2.14">gnu_grep</requirement> + <requirement type="set_environment">TP_SCRIPT_PATH</requirement> + </expand> + <version_command>grep --version | head -n 1</version_command> + <command> +<![CDATA[ + #if str($color) == "COLOR": + GREP_COLOR='1;34' + grep + --color=always + -P + -A $lines_after + -B $lines_before + $invert + $case_sensitive + -- "${url_paste}" + '${infile}' | \$TP_SCRIPT_PATH/ansi2html.sh > "${output}" + #else: + grep + -P + -A $lines_after + -B $lines_before + $invert + $case_sensitive + -- "${url_paste}" + '${infile}' | grep -v "^--$" > "${output}" + #end if + + ##grep_wrapper.sh '$infile' '$output' '$url_paste' $color -A $lines_after -B $lines_before $invert $case_sensitive +]]> + </command> + <inputs> + <param name="infile" format="txt" type="data" label="Select lines from" /> + + <param name="invert" type="select" label="that"> + <option value="">Match</option> + <option value="-v">Don't Match</option> + </param> + + <param name="url_paste" type="text" size="40" label="Regular Expression" help="See below for more details"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + + <param name="case_sensitive" type="select" label="Match type" help="(-i)"> + <option value="-i">case insensitive</option> + <option value="">case sensitive</option> + </param> + <param name="lines_before" type="integer" value="0" + label="Show lines preceding the matched line" help="leave it at zero unless you know what you're doing. (-B)" /> + <param name="lines_after" type="integer" value="0" + label="Show lines trailing the matched line" help="leave it at zero unless you know what you're doing. (-A)" /> + <param name="color" type="select" label="Output"> + <option value="NOCOLOR">text file (for further processing)</option> + <option value="COLOR">Highlighted HTML (for easier viewing)</option> + </param> + + </inputs> + <outputs> + <data name="output" format_source="infile" metadata_source="infile"> + <change_format> + <when input="color" value="COLOR" format="html"/> + </change_format> + </data> + </outputs> + <tests> + <test> + <!-- grep a FASTA file for sequences with specific motif --> + <param name="infile" value="grep1.txt" /> + <param name="case_sensitive" value="case sensitive" /> + <param name="invert" value="" /> + <param name="url_paste" value="AA.{2}GT" /> + <param name="lines_before" value="1" /> + <param name="lines_after" value="0" /> + <param name="color" value="NOCOLOR" /> + <output name="output" file="grep_results1.txt" /> + </test> + <test> + <!-- grep a FASTA file for sequences with specific motif - + show highlighed output --> + <param name="infile" value="grep1.txt" /> + <param name="case_sensitive" value="case sensitive" /> + <param name="invert" value="" /> + <param name="url_paste" value="AA.{2}GT" /> + <param name="lines_before" value="0" /> + <param name="lines_after" value="0" /> + <param name="color" value="COLOR" /> + <output name="output" file="grep_results2.html" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool runs the unix **grep** command on the selected data file. + +.. class:: infomark + +**TIP:** This tool uses the **perl** regular expression syntax (same as running 'grep -P'). This is **NOT** the POSIX or POSIX-extended syntax (unlike the awk/sed tools). + + +**Further reading** + +- Wikipedia's Regular Expression page (http://en.wikipedia.org/wiki/Regular_expression) +- Regular Expressions cheat-sheet (PDF) (http://www.addedbytes.com/cheat-sheets/download/regular-expressions-cheat-sheet-v2.pdf) +- Grep Tutorial (http://www.panix.com/~elflord/unix/grep.html) + +----- + +**Grep Examples** + +- **AGC.AAT** would match lines with AGC followed by any character, followed by AAT (e.g. **AGCQAAT**, **AGCPAAT**, **AGCwAAT**) +- **C{2,5}AGC** would match lines with 2 to 5 consecutive Cs followed by AGC +- **TTT.{4,10}AAA** would match lines with 3 Ts, followed by 4 to 10 characters (any characeters), followed by 3 As. +- **^chr([0-9A-Za-z])+** would match lines that begin with chromsomes, such as lines in a BED format file. +- **(ACGT){1,5}** would match at least 1 "ACGT" and at most 5 "ACGT" consecutively. +- **hsa|mmu** would match lines containing "hsa" or "mmu" (or both). + +----- + +**Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **\\d** matches a digit, same as [0-9]. +- **\\D** matches a non-digit. +- **\\s** matches a whitespace character. +- **\\S** matches anything BUT a whitespace. +- **\\t** matches a tab. +- **\\w** matches an alphanumeric character ( A to Z, 0 to 9 and underscore ) +- **\\W** matches anything but an alphanumeric character. +- **(** .. **)** groups a particular pattern. +- **\\Z** matches the end of a string(but not a internal line). +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/head.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,62 @@ +<tool id="tp_head_tool" name="Select first" version="@BASE_VERSION@.0"> + <description>lines from a dataset (head)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>head --version | head -n 1</version_command> + <command> +<![CDATA[ + head + --lines + $complement$count + '${infile}' + > '${outfile}' +]]> + </command> + <inputs> + <param name="infile" type="data" format="txt" label="File to select" /> + <param name="complement" type="select" label="Operation"> + <option value="">Keep first lines</option> + <option value="-">Remove last lines</option> + </param> + <param name="count" type="integer" size="5" value="10" + label="Number of lines" help="These will be kept/discarded depending on 'operation'. (--lines)" /> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="count" value="10"/> + <param name="infile" value="1.bed"/> + <output name="outfile" file="head_results1.bed"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool outputs specified number of lines from the **beginning** of a dataset + +----- + +**Example** + +Selecting 2 lines from this:: + + chr7 56632 56652 D17003_CTCF_R6 310 + + chr7 56736 56756 D17003_CTCF_R7 354 + + chr7 56761 56781 D17003_CTCF_R4 220 + + chr7 56772 56792 D17003_CTCF_R7 372 + + chr7 56775 56795 D17003_CTCF_R4 207 + + +will produce:: + + chr7 56632 56652 D17003_CTCF_R6 310 + + chr7 56736 56756 D17003_CTCF_R7 354 + + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,37 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="8.22">gnu_coreutils</requirement> + <yield/> + </requirements> + </xml> + <token name="@BASE_VERSION@">1.0</token> + <xml name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + </xml> + <token name="@REFERENCES@"> +<![CDATA[ +------ + +**Citation** + +If you use this tool in Galaxy, please cite: + +Bjoern A. Gruening (2014), `Galaxy wrapper <https://github.com/bgruening/galaxytools>`_ + +Assaf Gordon (gordon <at> cshl dot edu) +]]> + </token> + <xml name="citations"> + <citations> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multijoin Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,321 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Getopt::Long qw(:config no_ignore_case); +use Data::Dumper; +use Carp; +use File::Basename; +use Sort::Key::Natural qw(natsort); + +my $version = "0.1.1"; +my $field_sep = "\t"; +my $key_column; +my @values_columns; +my $max_value_column; +my @input_files; +my $input_headers ; +my $output_headers; +my $filler = "0"; +my $filler_string ; +my $ignore_duplicates; +my $debug = 0 ; +my %input_headers; +my $have_file_labels; +my %file_labels; + +sub parse_command_line_parameters(); +sub show_help(); +sub read_input_file($); +sub print_combined_data(); +sub sanitize_filename($); +sub print_output_header(); +sub show_examples(); + +## +## Program Start +## + +parse_command_line_parameters(); + +my %data; +foreach my $file (@input_files) { + read_input_file($file); +} +#print STDERR Dumper(\%input_headers),"\n"; +#print STDERR Dumper(\%data) if $debug; +print_output_header() if $output_headers; +print_combined_data(); + + +## +## Program End +## +sub print_output_header() +{ + my @output = ("key"); + foreach my $file ( @input_files ) { + foreach my $column ( @values_columns ) { + my $column_name = ( exists $input_headers{$file}->{$column} ) ? + $input_headers{$file}->{$column} : + "V$column" ; + + push @output, $file_labels{$file} . "_" . $column_name; + } + } + print join($field_sep,@output),"\n" + or die "Output error: can't write output line: $!\n"; +} + +sub print_combined_data() +{ + my @keys = natsort keys %data ; + + foreach my $key ( @keys ) { + my @outputs; + + foreach my $file (@input_files) { + push @outputs, + (exists $data{$key}->{$file}) ? $data{$key}->{$file} : $filler_string; + } + + print join($field_sep,$key,@outputs),"\n" + or die "Output error: can't write output line: $!\n"; + } +} + +sub sanitize_filename($) +{ + my ($filename) = shift or croak "missing file name"; + my $file_ID = basename($filename); + $file_ID =~ s/\.\w+$//; # remove extension + $file_ID =~ s/^[^\w\.\-]+//; + $file_ID =~ s/[^\w\.\-]+$//; + $file_ID =~ s/[^\w\.\-]+/_/g; # sanitize bad characters + return $file_ID; +} + +sub read_input_file($) +{ + my ($filename) = shift or croak "Missing input file name"; + + my @value_indexes = map { $_-1 } @values_columns; #zero-based indexes for value columns + + open FILE, "<", $filename + or die "Error: can't open file '$filename': $!\n"; + + ## Read file's header + if ($input_headers) { + my $line = <FILE>; + chomp $line; + my @fields = split $field_sep, $line; + + my $num_input_fields = scalar(@fields); + die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ; + + foreach my $col (@values_columns) { + $input_headers{$filename}->{$col} = $fields[$col-1] ; + } + } + + + ## Read file's data + while ( my $line = <FILE> ) { + chomp $line; + my @fields = split $field_sep, $line; + + my $num_input_fields = scalar(@fields); + die "Input error: file '$filename' line $. doesn't have enough columns (key column = $key_column, line has only $num_input_fields columns)\n" if $num_input_fields < $key_column ; + die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ; + + + my $key = $fields[$key_column-1]; + my $value = join($field_sep, @fields[@value_indexes]); + + die "Input error: file '$filename' line $. have duplicated key '$key'.\n" + if (exists $data{$key}->{$filename} && !$ignore_duplicates) ; + $data{$key}->{$filename} = $value; + } + close FILE + or die "Error: can't write and close file '$filename': $!\n"; +} + +sub parse_command_line_parameters() +{ + my $values_columns_string; + + my $rc = GetOptions("help" => \&show_help, + "key|k=i" => \$key_column, + "values|v=s" => \$values_columns_string, + "t=s" => \$field_sep, + "in-header" => \$input_headers, + "out-header|h" => \$output_headers, + "H" => sub { $input_headers = 1 ; $output_headers = 1 ; }, + "ignore-dups" => \$ignore_duplicates, + "filler|f=s" => \$filler, + "examples" => \&show_examples, + "labels" => \$have_file_labels, + ); + die "Error: inalid command-line parameters.\n" unless $rc; + + die "Error: missing key column. use --key N. see --help for more details.\n" unless defined $key_column; + die "Error: Invalid key column ($key_column). Must be bigger than zero. see --help for more details.\n" if $key_column <= 0 ; + + die "Error: missing values column. use --values V1,V2,Vn. See --help for more details.\n" unless defined $values_columns_string; + @values_columns = split(/\s*,\s*/, $values_columns_string); + + die "Error: missing values column. use --values N,N,N. see --help for more details.\n" unless scalar(@values_columns)>0; + foreach my $v (@values_columns) { + die "Error: invalid value column ($v), please use only numbers>=1. see --help for more details.\n" + unless $v =~ /^\d+$/ && $v>=1; + + $max_value_column = $v unless defined $max_value_column && $max_value_column>$v; + } + + $filler_string = join($field_sep, map { $filler } @values_columns); + + + if ($have_file_labels) { + ## have file labels - each pair of parameters is a file/label pair. + die "Error: missing input files and labels\n" if scalar(@ARGV)==0; + die "Error: when using --labels, a pair of file names + labels is required (got odd number of argiments)\n" unless scalar(@ARGV)%2==0; + + while (@ARGV) { + my $filename = shift @ARGV; + my $label = shift @ARGV; + $label =~ s/^[^\.\w\-]+//; + $label =~ s/[^\.\w\-]+$//g; + $label =~ s/[^\.\w\-]+/_/g; + + my $file_ID = sanitize_filename($filename); + $file_labels{$filename} = $label; + push @input_files, $filename; + } + } else { + ## no file labels - the rest of the arguments are just file names; + @input_files = @ARGV; + die "Error: missing input files\n" if scalar(@input_files)==0; + die "Error: need more than one input file to join.\n" if scalar(@input_files)==1; + + foreach my $file (@input_files) { + my $file_ID = sanitize_filename($file); + $file_labels{$file} = $file_ID; + } + } + +} + +sub show_help() +{ + print<<EOF; +Multi-File join, version $version +Copyright (C) 2012 - A. Gordon (gordon at cshl dot edu) +License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html) + +Usage: + multijoin [OPTIONS] -k N -v V1,V2,Vn,.. FILE1 FILE2 ... FILEn + +Options: + + --help This helpful help screen. + + -k N + --key N Use column N as key column. + + -v V1,V2,Vn + --values V1,V2,Vn + Use columns V1,V2,Vn as value columns - those will be joined + According to the Key column. + Multiple columns can be specified. + + -t SEP Use SEP as field separator character (default: tab). + + -h + --out-header Add a header line to the output file. + + --in-header The input files have a header line. + The first line will not be joined. + if '--out-header' is also used, the output column headers will + be constructed based on the input header column names. + + -H + --headers Same as '--in-header --out-header' combined. + + --ignore-dups Ignore duplicated keys (within a file). + By default, duplicated keys cause an error. + + -f X + --filler X Fill missing values with X. + (Default: '$filler'). + + --labels When printning output headers with '-h', instead of using the file name, + use specific labels. + Each file name must be followed by a name. + + example (without labels): + \$ multijoin -h -k 1 -v 2 A.TXT B.TXT C.TXT + + example (with labels): + \$ multijoin -h --labels -k 1 -v 2 A.TXT Sample1 B.TXT SampleB C.TXT SampleC + + --examples Show detailed examples. + +EOF + exit(0); +} + +sub show_examples() +{ + print<<EOF; + +To join three files, based on the 4th column, and keeping the 7th,8th,9th columns: + +\$ head *.txt +==> AAA.txt <== +chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 +chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 +chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 +chr4 995793 996435 FBtr0111046 0 + 7 166 642 +chr4 995793 997931 FBtr0111044 0 + 28 683 2138 +chr4 995793 997931 FBtr0111045 0 + 28 683 2138 +chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 + +==> BBB.txt <== +chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 +chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 +chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 +chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 +chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 +chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 +chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 +chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 +chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 + +==> CCC.txt <== +chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 +chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 +chr4 995793 996435 FBtr0111046 0 + 5 304 642 +chr4 995793 997931 FBtr0111044 0 + 17 714 2138 +chr4 995793 997931 FBtr0111045 0 + 17 714 2138 +chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 + +\$ multijoin -h --key 4 --values 7,8,9 *.txt | head -n 10 +key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 +FBtr0089116 0 0 0 56 1296 15144 0 0 0 +FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 +FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0111044 28 683 2138 0 0 0 17 714 2138 +FBtr0111045 28 683 2138 0 0 0 17 714 2138 +FBtr0111046 7 166 642 0 0 0 5 304 642 +FBtr0300796 0 0 0 56 1296 14475 0 0 0 + + + +EOF + exit(0); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multijoin.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,133 @@ +<tool id="tp_multijoin_tool" name="Multi-Join" version="@BASE_VERSION@.0"> + <description>(combine multiple files)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="5.18.1">perl</requirement> + <requirement type="package" version="1.0">text_processing_perl_packages</requirement> + </expand> + <command interpreter="perl"> +<![CDATA[ + multijoin + --key '$key_column' + --values '$value_columns' + --filler '$filler' + $ignore_dups + $output_header + $input_header + $first_file + #for $file in $files: + '$file' + #end for + > '$outfile' +]]> + </command> + <inputs> + <param name="first_file" type="data" format="txt" label="File to join"/> + <param name="files" multiple="True" type="data" format="txt" label="add additional file" /> + + <param name="key_column" label="Common key column" type="integer" + value="1" help="Usually gene-ID or other common value" /> + + <param name="value_columns" label="Column with values to preserve" + type="data_column" data_ref="first_file" accept_default="true" multiple="True" display="checkboxes"/> + + <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" /> + <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" /> + <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." /> + <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="outfile" format_source="first_file" metadata_source="first_file" /> + </outputs> + <tests> + <test> + <param name="first_file" value="multijoin1.txt" /> + <param name="files" value="multijoin2.txt,multijoin3.txt" /> + <param name="key_column" value="4" /> + <param name="value_columns" value="c7,c8,c9" /> + <param name="output_header" value="True" /> + <output name="outfile" file="multijoin_result1.txt" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool joins multiple tabular files based on a common key column. + +----- + +**Example** + +To join three files, based on the 4th column, and keeping the 7th,8th,9th columns: + +**First file (AAA)**:: + + chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 + chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 + chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 + chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 + chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 + chr4 995793 996435 FBtr0111046 0 + 7 166 642 + chr4 995793 997931 FBtr0111044 0 + 28 683 2138 + chr4 995793 997931 FBtr0111045 0 + 28 683 2138 + chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 + ... + + +**Second File (BBB)**:: + + chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 + chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 + chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 + chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 + chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 + chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 + chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 + chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 + chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 + ... + +**Third file (CCC)**:: + + chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 + chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 + chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 + chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 + chr4 995793 996435 FBtr0111046 0 + 5 304 642 + chr4 995793 997931 FBtr0111044 0 + 17 714 2138 + chr4 995793 997931 FBtr0111045 0 + 17 714 2138 + chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 + ... + + +**Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return:: + + key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 + FBtr0089116 0 0 0 56 1296 15144 0 0 0 + FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 + FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 + FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 + FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 + FBtr0111044 28 683 2138 0 0 0 17 714 2138 + FBtr0111045 28 683 2138 0 0 0 17 714 2138 + FBtr0111046 7 166 642 0 0 0 5 304 642 + FBtr0300796 0 0 0 56 1296 14475 0 0 0 + ... + +.. class:: infomark + +Input files need not be sorted. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,124 @@ +Galaxy wrappers for common unix text-processing tools +===================================================== + +The initial work was done by Assaf Gordon and Greg Hannon's lab ( http://hannonlab.cshl.edu ) +in Cold Spring Harbor Laboratory ( http://www.cshl.edu ). In late 2013 maintainence and +further development was taken over by Bjoern Gruening. Feel free to contribute any general purpose +text manipulation tool to this repository. + + +Tools: +------ + + * awk - The AWK programmning language ( http://www.gnu.org/software/gawk/ ) + * sed - Stream Editor ( http://sed.sf.net ) + * grep - Search files ( http://www.gnu.org/software/grep/ ) + * sort_columns - Sorting every line according to there columns + * GNU Coreutils programs ( http://www.gnu.org/software/coreutils/ ): + + * sort - sort files + * join - join two files, based on common key field. + * cut - keep/discard fields from a file + * unsorted_uniq - keep unique/duplicated lines in a file + * sorted_uniq - keep unique/duplicated lines in a file + * head - keep the first X lines in a file. + * tail - keep the last X lines in a file. + * unfold_column - unfold a column with multiple entities into multiple lines + + +Few improvements over the standard tools: +----------------------------------------- + + * EasyJoin - A Join tool that does not require pre-sorted the files ( https://github.com/agordon/filo/blob/scripts/src/scripts/easyjoin ) + * Multi-Join - Join multiple (>2) files ( https://github.com/agordon/filo/blob/scripts/src/scripts/multijoin ) + * Find_and_Replace - Find/Replace text in a line or specific column. + * Grep with Perl syntax - uses grep with Perl-Compatible regular expressions. + * HTML'd Grep - grep text in a file, and produced high-lighted HTML output, for easier viewing ( uses https://github.com/agordon/filo/blob/scripts/src/scripts/sort-header ) + + +Requirements: +------------- + + * Coreutils vesion 8.22 or later. + * AWK version 4.0.1 or later. + * SED version 4.2 *with* a special patch + * Grep with PCRE support + +All dependencies will be installed automatically with the Galaxy `Tool Shed`_ and the following repository: https://toolshed.g2.bx.psu.edu/view/bgruening/text_processing + + +------------------- +NOTE About Security +------------------- + +The included tools are secure (barring unintentional bugs): +The main concern might be executing system commands with awk's "system" and sed's "e" commands, +or reading/writing arbitrary files with awk's redirection and sed's "r/w" commands. +These commands are DISABLED using the "--sandbox" parameter to awk and sed. + +User trying to run an awk program similar to:: + + BEGIN { system("ls") } + +Will get an error (in Galaxy) saying:: + + fatal: 'system' function not allowed in sandbox mode. + +User trying to run a SED program similar to:: + + 1els + +will get an error (in Galaxy) saying:: + + sed: -e expression #1, char 2: e/r/w commands disabled in sandbox mode + +That being said, if you do find some vulnerability in these tools, please let me know and I'll try fix them. + +------------ +Installation +------------ + +Should be done via the Galaxy `Tool Shed`_. +Install the following repository: https://toolshed.g2.bx.psu.edu/view/bgruening/text_processing + +.. _`Tool Shed`: http://wiki.galaxyproject.org/Tool%20Shed + + +---- +TODO +---- + + * add shuf, we can remove the random feature from sort and use shuf instead + * move some advanced settings under a conditional, for example the cut tools offers to cut bytes + * cut wrapper has some output conditional magic for interval files, that needs to be checked + * comm wrapper, see the Galaxy default one + * evaluate the join wrappers against the Galaxy ones, maybe we should drop them + + +------- +License +------- + + * Copyright (c) 2009-2013 A. Gordon (gordon <at> cshl dot edu) + * Copyright (c) 2013-2015 B. Gruening (bjoern dot gruening <at> gmail dot com) + + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/recurring_lines.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,78 @@ +<tool id="tp_text_file_with_recurring_lines" name="Create text file" version="@BASE_VERSION@.0"> + <description>with recurring lines</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>yes --version | head -n 1</version_command> + <command> +<![CDATA[ + #for $token in $token_set: + #if str($token.repeat_select.repeat_select_opts) == 'user': + times=#echo $token.repeat_select.times#; + #else: + times=`wc -l $token.repeat_select.infile | awk '{print $1}'`; + #end if + yes -- "${token.line}" 2>/dev/null | head -n \$times >> $outfile; + #end for +]]> + </command> + <inputs> + <repeat name="token_set" title=" selection" min="1"> + <param name="line" type="text" size="30" + label="Characters to insert" help="Specify the characters that will be inserted X times in every line"/> + <conditional name="repeat_select"> + <param name="repeat_select_opts" type="select" label="Specify the number of iterations by"> + <option value="file">File (for each line in file)</option> + <option value="user" selected="True">User defined number</option> + </param> + <when value="user"> + <param name="times" size="10" type="integer" value="10" min="1" label="How many times?"/> + </when> + <when value="file"> + <param name="infile" type="data" format="txt" label="Template file" + help="For every line, the specified characters will be written once. That means X is the line-number from the given file."/> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data format="text" name="outfile"/> + </outputs> + <tests> + <test> + <repeat name="token_set"> + <param name="line" value="freedom" /> + <param name="repeat_select_opts" value="file" /> + <param name="infile" value="multijoin2.txt" /> + </repeat> + <output name="outfile" file="recurring_result1.txt" /> + </test> + <test> + <repeat name="token_set"> + <param name="line" value="freedom" /> + <param name="repeat_select_opts" value="user" /> + <param name="times" value="10" /> + </repeat> + <repeat name="token_set"> + <param name="line" value="war is over" /> + <param name="repeat_select_opts" value="user" /> + <param name="times" value="10" /> + </repeat> + <output name="outfile" file="recurring_result2.txt" /> + </test> + </tests> + <help> +<![CDATA[ +.. class:: infomark + +**What it does** + +This tool creates a text file with recurring lines. You can specify a bunch of characters or entire sentences. +The entire string will be printed X times separated by a line break. X can be either given by the use as a number or calculated by a given file. +In case the user provides a file, the line number will be used as X. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_text_in_column.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,135 @@ +<tool id="tp_replace_in_column" name="Replace Text" version="@BASE_VERSION@.0"> + <description>in a specific column</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.1.0">gnu_awk</requirement> + </expand> + <version_command>awk --version | head -n 1</version_command> + <command> +<![CDATA[ + awk + -v OFS=" " + --re-interval + --sandbox '{ \$$column = gensub( /$find_pattern/, "$replace_pattern", "g", \$$column ) ; print \$0 ; }' + "$infile" + > "$outfile" +]]> + </command> + <inputs> + <param format="tabular" name="infile" type="data" label="File to process" /> + <param name="column" label="in column" type="data_column" data_ref="infile" accept_default="true" /> + + <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param name="replace_pattern" type="text" size="20" label="Replace with" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="replace_text_in_column1.txt" ftype="tabular" /> + <param name="column" value="4" /> + <param name="find_pattern" value=".+_(R.)" /> + <param name="replace_pattern" value="\\1" /> + <output name="outfile" file="replace_text_in_column_results1.txt" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool performs find & replace operation on a specified column in a given file. + +.. class:: infomark + +The **pattern to find** uses the **extended regular** expression syntax (same as running 'awk --re-interval'). + +.. class:: infomark + +**TIP:** If you need more complex patterns, use the *awk* tool. + +----- + + +**Examples of Find Patterns** + +- **HELLO** The word 'HELLO' (case sensitive). +- **AG.T** The letters A,G followed by any single character, followed by the letter T. +- **A{4,}** Four or more consecutive A's. +- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. +- **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern. + + +**Examples of Replace Patterns** + +- **WORLD** The word 'WORLD' will be placed whereever the find pattern was found. +- **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **&** (ampersand) represents the matched find pattern. +- **\\1** The text which matched the first parenthesis in the Find Pattern. + + +----- + +**Example 1** + +**Find Pattern:** HELLO +**Replace Pattern:** WORLD + +Every time the word HELLO is found, it will be replaced with the word WORLD. This operation affects only the selected column. + +----- + +**Example 2** + +**Find Pattern:** ^(.{4}) +**Replace Pattern:** &\\t + +Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. This operation affects only the selected column. + + +----- + +**Extened Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + + +**Note**: AWK uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_text_in_line.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,132 @@ +<tool id="tp_replace_in_line" name="Replace Text" version="@BASE_VERSION@.0"> + <description>in entire line</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> + </expand> + <version_command>sed --version | head -n 1</version_command> + <command> +<![CDATA[ + sed + -r + --sandbox + "s/$find_pattern/$replace_pattern/g" + "$infile" + > "$outfile" +]]> + </command> + <inputs> + <param format="txt" name="infile" type="data" label="File to process" /> + <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="infile" value="replace_text_in_line1.txt" /> + <param name="find_pattern" value="CTC." /> + <param name="replace_pattern" value="FOOBAR" /> + <output name="outfile" file="replace_text_in_line_results1.txt" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool performs find & replace operation on a specified file. + +.. class:: infomark + +The **pattern to find** uses the **extended regular** expression syntax (same as running 'sed -r'). + +.. class:: infomark + +**TIP:** If you need more complex patterns, use the *sed* tool. + +----- + +**Examples of Find Patterns** + +- **HELLO** The word 'HELLO' (case sensitive). +- **AG.T** The letters A,G followed by any single character, followed by the letter T. +- **A{4,}** Four or more consecutive A's. +- **chr2[012]\\t** The words 'chr20' or 'chr21' or 'chr22' followed by a tab character. +- **hsa-mir-([^ ]+)** The text 'hsa-mir-' followed by one-or-more non-space characters. When using parenthesis, the matched content of the parenthesis can be accessed with **\1** in the **replace** pattern. + + +**Examples of Replace Patterns** + +- **WORLD** The word 'WORLD' will be placed whereever the find pattern was found. +- **FOO-&-BAR** Each time the find pattern is found, it will be surrounded with 'FOO-' at the begining and '-BAR' at the end. **$** (ampersand) represents the matched find pattern. +- **\\1** The text which matched the first parenthesis in the Find Pattern. + + +----- + +**Example 1** + +**Find Pattern:** HELLO +**Replace Pattern:** WORLD + +Every time the word HELLO is found, it will be replaced with the word WORLD. + + +----- + +**Example 2** + +**Find Pattern:** ^(.{4}) +**Replace Pattern:** &\\t + +Find the first four characters in each line, and replace them with the same text, followed by a tab character. In practice - this will split the first line into two columns. + + +----- + +**Extened Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + + +**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sed.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,133 @@ +<tool id="tp_sed_tool" name="Text transformation" version="@BASE_VERSION@.0"> + <description>with sed</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> + </expand> + <version_command>sed --version | head -n 1</version_command> + <command> +<![CDATA[ + sed + --sandbox + -r + #if $adv_opts.adv_opts_selector == 'advanced': + $adv_opts.silent + #end if + -f '$sed_script' + '$infile' + > '$output' +]]> + </command> + <inputs> + <param format="txt" name="infile" type="data" label="File to process" /> + <param name="code" type="text" area="true" size="5x35" label="SED Program" help=""> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <param name="silent" type="select" label="Operation mode" help="Same as 'sed -n', leave at 'normal' unless you know what you're doing." > + <option value="">normal</option> + <option value="-n">silent</option> + </param> + </when> + </conditional> + </inputs> + <configfiles> + <configfile name="sed_script"> + $code + </configfile> + </configfiles> + <outputs> + <data name="output" format_source="infile" metadata_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="sed1.txt" /> + <param name="code" value="1d ; s/foo/bar/" /> + <param name="silent" value="" /> + <output name="output" file="sed_results1.txt" /> + </test> + <test> + <param name="infile" value="sed1.txt" /> + <param name="code" value="/foo/ { s/foo/baz/g ; p }" /> + <param name="adv_opts_selector" value="advanced" /> + <param name="silent" value="-n" /> + <output name="output" file="sed_results2.txt" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool runs the unix **sed** command on the selected data file. + +.. class:: infomark + +**TIP:** This tool uses the **extended regular** expression syntax (same as running 'sed -r'). + + + +**Further reading** + +- Short sed tutorial (http://www.linuxhowtos.org/System/sed_tutorial.htm) +- Long sed tutorial (http://www.grymoire.com/Unix/Sed.html) +- sed faq with good examples (http://sed.sourceforge.net/sedfaq.html) +- sed cheat-sheet (http://www.catonmat.net/download/sed.stream.editor.cheat.sheet.pdf) +- Collection of useful sed one-liners (http://student.northpark.edu/pemente/sed/sed1line.txt) + +----- + +**Sed commands** + +The most useful sed command is **s** (substitute). + +**Examples** + +- **s/hsa//** will remove the first instance of 'hsa' in every line. +- **s/hsa//g** will remove all instances (beacuse of the **g**) of 'hsa' in every line. +- **s/A{4,}/--&--/g** will find sequences of 4 or more consecutive A's, and once found, will surround them with two dashes from each side. The **&** marker is a place holder for 'whatever matched the regular expression'. +- **s/hsa-mir-([^ ]+)/short name: \\1 full name: &/** will find strings such as 'hsa-mir-43a' (the regular expression is 'hsa-mir-' followed by non-space characters) and will replace it will string such as 'short name: 43a full name: hsa-mir-43a'. The **\\1** marker is a place holder for 'whatever matched the first parenthesis' (similar to perl's **$1**) . + + +**sed's Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + + +**Note**: SED uses extended regular expression syntax, not Perl syntax. **\\d**, **\\w**, **\\s** etc. are **not** supported. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort-header Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,281 @@ +#!/usr/bin/env perl +## +## Sort-header - wrapper for GNU sort with header-line support +## +## Copyright(C) A. Gordon +## license AGPLv3+ +## +use strict; +use warnings; +use Data::Dumper; +use IO::Handle; +use Getopt::Long qw(:config bundling no_ignore_case_always); + +## Forward declarations +sub add_standard_sort_param(@); +sub add_standard_sort_param_value(@); +sub forbidden_sort_param(@); +sub show_help(); +sub show_version(); +sub show_examples(); +sub parse_commandline_options(); +sub reassign_input_output(); +sub process_header_lines(); +sub run_sort(); +sub read_line_non_buffered(); + + +## +## Runtime options +## +my $PROGRAM="sort-header"; +my $VERSION=0.4; + +my $check_only=undef; +my $input_file=undef; +my $output_file=undef; +my $field_separator=undef; +my $header_lines =1 ; +my $debug=undef; +my $sort_exit_code=1; #by default, assume some error + +my @sort_options; + +## +## Program Start +## +parse_commandline_options(); +reassign_input_output(); +process_header_lines(); +run_sort(); +exit($sort_exit_code); +## +## Program End +## + +sub show_examples() +{ +print<<EOF; +Sorting a file with a header line: + +\$ cat input.txt +Fruit Color Price +Banana Yellow 4.1 +Avocado Green 8.0 +Apple Red 3.0 +Melon Green 6.1 + +# By default, 'sort-header' assumes 1 header line +# (no need to use --header in this case). + +\$ sort-header -k3,3nr input.txt +Fruit Color Price +Avocado Green 8.0 +Melon Green 6.1 +Banana Yellow 4.1 +Apple Red 3.0 + +EOF + exit(0); +} + +sub show_help() +{ +print<<EOF; +${PROGRAM}: Wrapper for GNU sort, allowing sorting files with header lines. + +Usage: $PROGRAM [HEADER-OPTIONS] [GNU sort Options] [INPUT-FILE] + +HEADER-OPTIONS: the following options are supported by '${PROGRAM}': + + --header N = Treat the first N lines as header lines. + These line will NOT be sorted. They will be passed + directly to the output file. (default: 1) + + --version = Print ${PROGRAM}'s version. + + --debugheader = Print debug messages (relating to ${PROGRAM}'s operation). + + --help = Show this help screen. + + --examples = Show usage examples. + +GNU sort options: + Most of the standard GNU sort options are supported and passed to GNU sort. + The following options can not be used with '${PROGRAM}': + + -m --merge => ${PROGRAM} can only sort one file, not merge multiple files. + -c -C --check => Currently not supported + --files0-from => Currently not supported + -z --zero-terminated => Currently not supported + +INPUT-FILE: + If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort). + +EOF + exit(0); +} + +sub show_version() +{ +print<<EOF; +$PROGRAM $VERSION +Copyright (C) 2010 A. Gordon (gordon\@cshl.edu) +License AGPLv3+: Affero GPL version 3 or later (http://www.gnu.org/licenses/agpl.html) + +To see the GNU's sort version, run: + sort --version +EOF + exit(0); +} + +sub parse_commandline_options() +{ + my $rc = GetOptions( + "ignore-leading-blanks|b" => \&add_standard_sort_param, + "dictionary-order|d" => \&add_standard_sort_param, + "ignore-case|f" => \&add_standard_sort_param, + "general-numeric-sort|g" => \&add_standard_sort_param, + "ignore-nonprinting|i" => \&add_standard_sort_param, + "month-sort|M" => \&add_standard_sort_param, + "human-numeric-sort|h" => \&add_standard_sort_param, + "numeric-sort|n" => \&add_standard_sort_param, + "random-source=s" => \&add_standard_sort_param_value, + "random-sort|R" => \&add_standard_sort_param, + "reverse|r" => \&add_standard_sort_param, + "sort=s" => \&add_standard_sort_param_value, + "version-sort|V" => \&add_standard_sort_param, + + "check|c" => \&forbidden_sort_param, + "C" => \&forbidden_sort_param, + "compress-program=s" => \&add_standard_sort_param_value, + "debug" => \&add_standard_sort_param, + + "files0-from=s" => \&forbidden_sort_param, + + "key|k=s" => \&add_standard_sort_param_value, + "merge|m" => \&forbidden_sort_param, + "batch-size=i" => \&forbidden_sort_param, + + "parallel=i" => \&add_standard_sort_param_value, + + "output|o=s" => \$output_file, + + "stable|s" => \&add_standard_sort_param, + "buffer-size|S=s" => \&add_standard_sort_param_value, + + "field-separator|t=s" => \&add_standard_sort_param_value, + "temporary-directory|T=s" => \&add_standard_sort_param_value, + "unique|u" => \&add_standard_sort_param, + + "zero-terminated|z" => \&forbidden_sort_param, + + "help" => \&show_help, + "version" => \&show_version, + "examples" => \&show_examples, + + "header=i" => \$header_lines, + "debugheader" => \$debug, + ); + + exit 1 unless $rc; + + my @INPUT_FILES = @ARGV; + + die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0; + die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1); + $input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1; + + if ($debug) { + warn "$PROGRAM: number of header lines = $header_lines\n"; + warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n"; + } +} + +sub reassign_input_output() +{ + if ($output_file) { + warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug; + open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n"; + STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n"; + } + + + if ($input_file) { + warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug; + open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n"; + STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n"; + } +} + +sub process_header_lines() +{ + warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug; + for (my $i=0; $i<$header_lines; $i++) { + my $line = read_line_non_buffered(); + exit unless defined $line; + print $line; + } +} + +sub run_sort() +{ + warn "$PROGRAM: Running GNU sort...\n" if $debug; + system('sort', @sort_options); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'sort': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n"; + } + else { + $sort_exit_code = ($? >> 8); + } +} + + +sub add_standard_sort_param(@) +{ + my ($obj)= @_; + add_standard_sort_param_value($obj, undef); +} + +sub add_standard_sort_param_value(@) +{ + my ($obj,$value)= @_; + + my $option = "" . $obj ; #stringify the optino object, get the option name. + + if (length($option)==1) { + $option = "-" . $option ; + } else { + $option = "--" . $option ; + } + push @sort_options, $option ; + push @sort_options, $value if $value; +} + +sub forbidden_sort_param(@) +{ + my ($obj,$value)= @_; + my $option = "" . $obj ; #stringify the optino object, get the option name. + + die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n"; +} + +sub read_line_non_buffered() +{ + my $line = ''; + while ( 1 ) { + my $c; + my $rc = sysread STDIN, $c, 1; + die "$PROGRAM: STDIN Read error: $!" unless defined $rc; + return $line if $rc==0 && $line; + return undef if $rc==0 && (!$line); + $line .= $c ; + return $line if ( $c eq "\n"); + } +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,176 @@ +<tool id="tp_sort_header_tool" name="Sort" version="@BASE_VERSION@.0"> + <description>data in ascending or descending order</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> + </expand> + <version_command>sort --version | head -n 1</version_command> + <command> +<![CDATA[ + ( + LC_ALL=C + #if int($header) > 0: + sed -u '${header}'q && + #end if + + sort $unique $ignore_case --stable -t ' ' + + #for $key in $sortkeys: + -k '${key.column}${key.order}${key.style},${key.column}' + #end for + + ) < '${infile}' > '${outfile}' +]]> + </command> + <inputs> + <param format="tabular" name="infile" type="data" label="Sort Query" /> + <param name="header" type="integer" size="5" value="0" + label="Number of header lines" help="These will be ignored during sort."> + <validator type="in_range" message="Negative values are not allowed." min="0"/> + </param> + + <repeat name="sortkeys" title="Column selections" min="1"> + <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> + <param name="order" type="select" display="radio" label="in"> + <option value="">Ascending order</option> + <option value="r">Descending order</option> + </param> + <param name="style" type="select" display="radio" label="Flavor"> + <option value="n">Fast numeric sort (-n)</option> + <option value="g">General numeric sort ( scientific notation -g)</option> + <option value="V">Natural/Version sort (-V) </option> + <option value="">Alphabetical sort</option> + <option value="h">Human-readable numbers (-h)</option> + <option value="R">Random order (-R)</option> + </param> + </repeat> + + <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue="" + label="Output unique values" help="Print only unique values, based on sorted key columns. See help section for details. (--unique)" /> + <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" + label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters. (-i)" /> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="infile" value="sort1.bed"/> + <param name="header" value="3"/> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value=""/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="3"/> + <param name="style" value="n"/> + <param name="order" value="r"/> + </repeat> + <output name="outfile" file="sort_result1.bed"/> + </test> + <test> + <param name="infile" value="sort1.bed"/> + <param name="header" value="3"/> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="style" value=""/> + <param name="order" value=""/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="3"/> + <param name="style" value="n"/> + <param name="order" value=""/> + </repeat> + <output name="outfile" file="sort_result2.bed"/> + </test> + <test> + <param name="infile" value="sort2.bed"/> + <repeat name="sortkeys"> + <param name="column" value="5"/> + <param name="style" value="g"/> + <param name="order" value=""/> + </repeat> + <output name="outfile" file="sort_result3.bed"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool sorts an input file. + +----- + +**Sorting Styles** + +* **Fast Numeric**: sort by numeric values. Handles integer values (e.g. 43, 134) and decimal-point values (e.g. 3.14). *Does not* handle scientific notation (e.g. -2.32e2). +* **General Numeric**: sort by numeric values. Handles all numeric notations (including scientific notation). Slower than *fast numeric*, so use only when necessary. +* **Natural Sort**: Sort in 'natural' order (natural to humans, not to computers). See example below. +* **Alphabetical sort**: Sort in strict alphabetical order. See example below. +* **Human-readable numbers**: Sort human readble numbers (e.g. 1G > 2M > 3K > 400) +* **Random order**: return lines in random order. + +------ + +**Example - Header line** + +**Input file** (note first line is a header line, should not be sorted):: + + Fruit Color Price + Banana Yellow 4.1 + Avocado Green 8.0 + Apple Red 3.0 + Melon Green 6.1 + +**Sorting** by **numeric order** on column **3**, with **header**, will return:: + + Fruit Color Price + Apple Red 3.0 + Banana Yellow 4.1 + Melon Green 6.1 + Avocado Green 8.0 + + +----- + +**Example - Natural vs. Alphabetical sorting** + +Given the following list:: + + chr4 + chr13 + chr1 + chr10 + chr20 + chr2 + +**Alphabetical sort** would produce the following sorted list:: + + chr1 + chr10 + chr13 + chr2 + chr20 + chr4 + +**Natural Sort** would produce the following sorted list:: + + chr1 + chr2 + chr4 + chr10 + chr13 + chr20 + + +.. class:: infomark + +If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**, not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort_rows.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,37 @@ +<tool id="tp_sort_rows" name="Sort a row" version="@BASE_VERSION@.0"> + <description>according to their columns</description> + <macros> + <import>macros.xml</import> + </macros> + <command> +<![CDATA[ + python -c 'for line in ( "\t".join(sorted(line.strip().split("\t"))) for line in open("$infile") ): print line' > $outfile +]]> + </command> + <inputs> + <param format="tabular" name="infile" type="data" label="Tabular file that should be sorted"/> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <options sanitize="False"/> + <tests> + <test> + <param name="infile" value="sort_rows1.tabular" ftype="tabular" /> + <output name="outfile" file="sort_rows_results1.bed"/> + </test> + </tests> + <help> +<![CDATA[ +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +**What it does** + +That tool sorts each row in a TAB separated file, according to their columns. In other words: It is a sorted reordering of all columns. + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sorted_uniq.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,105 @@ +<tool id="tp_uniq_tool" name="Unique lines" version="@BASE_VERSION@.0"> + <description>assuming sorted input file</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"> + <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement> + </expand> + <version_command>uniq --version | head -n 1</version_command> + <command> +<![CDATA[ + uniq + #if $skipfields: + -f $skipfields + #end if + $ignorecase + + #if $grouping.grouping_select == 'yes': + --group=$grouping.group + #else: + $grouping.count + $grouping.repeated + $grouping.uniqueonly + #end if + + "$infile" + + #if $grouping.grouping_select == 'no' and $grouping.count: + ## count will print the count with spaces in front of the line and + ## with a space (not a tab) after the number, we need to cahnge that + | sed -e 's/ *//' -e 's/ /\t/' + #end if + > "$outfile" +]]> + </command> + <inputs> + <param name="infile" format="tabular" type="data" + label="File to scan for unique values" help="Make sure you have sorted this file" /> + + <conditional name="grouping"> + <param name="grouping_select" type="select" label="Do you want to group each unique group?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param name="count" type="boolean" truevalue="-c" falsevalue="" + label="Counting number of occurrences" help="Prefix lines by the number of occurrences. (-c)" /> + <param name="repeated" type="boolean" truevalue="-d" falsevalue="" + label="Only print duplicate lines" help="(-d)"/> + <param name="uniqueonly" type="boolean" checked="True" truevalue="-u" falsevalue="" + label="Only print unique lines" help="(-u)" /> + </when> + <when value="yes"> + <param name="group" type="select" + label="Output all lines, and delimit each unique group" help="(--group)"> + <option value="">No grouping at all</option> + <option value="separate">Separate unique groups with a single delimiter</option> + <option value="prepend">Output a delimiter before each group of unique items</option> + <option value="append">Output a delimiter after each group of unique items</option> + <option value="both">Output a delimiter around each group of unique items</option> + </param> + </when> + </conditional> + + <param name="ignorecase" type="boolean" truevalue="-i" falsevalue="" + label="Ignore differences in case when comparing" help="(-i)"/> + <param name="skipfields" type="integer" size="2" value="0" + label="Avoid comparing the first N fields" help="Use zero to start from the first field. (-f)" /> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="infile" value="sorted_uniq1.tabular" /> + <param name="grouping_select" value="no"/> + <param name="count" value="True"/> + <param name="ignorecase" value="True"/> + <param name="uniqueonly" value="True"/> + <output name="outfile" file="sorted_uniq_results1.tabular" /> + </test> + <test> + <param name="infile" value="sorted_uniq1.tabular" /> + <param name="ignorecase" value="True"/> + <param name="grouping_select" value="yes"/> + <param name="group" value="separate"/> + <output name="outfile" file="sorted_uniq_results2.tabular" /> + </test> + </tests> + <help> +<![CDATA[ +This tool takes a sorted file and look for lines that are unique. + +.. class:: warningmark + +Please make sure your file is sorted, or else this tool will give you an erroneous output. + +.. class:: infomark + +You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools". + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tac.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,90 @@ +<tool id="tp_tac" name="tac" version="@BASE_VERSION@.0"> + <description>reverse a file (reverse cat)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>tac --version | head -n 1</version_command> + <command> +<![CDATA[ + tac + #if str($separator.separator_select) == "yes": + $separator.before + $separator.regex + #if $separator.separator_string: + "$separator.separator_string" + #end if + #end if + "$infile" + > "$outfile" +]]> + </command> + <inputs> + <param name="infile" type="data" format="txt" label="Input file"/> + <conditional name="separator"> + <param name="separator_select" type="select" label="Do you want to use a separator other than newline?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="before" type="boolean" truevalue="-b" falsevalue="" checked="True" + label="Attach the separator before instead of after" help="(--before)"/> + <param name="regex" type="boolean" truevalue="-r" falsevalue="" checked="True" + label="Interpret the separator as a regular expression" help="(--regex)"/> + <param name="separator_string" size="5" type="text" value="" + label="Separator to use" help="(--separator)" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="infile" value="1.bed"/> + <output name="outfile" file="tac_result1.txt"/> + </test> + <test> + <param name="infile" value="1.bed"/> + <param name="separator_select" value="yes"/> + <param name="before" value="True"/> + <output name="outfile" file="tac_result2.txt"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +tac is a Linux command that allows you to see a file line-by-line backwards. It is named by analogy with cat. + +Mandatory arguments to long options are mandatory for short options too: + + -b, --before attach the separator before instead of after + -r, --regex interpret the separator as a regular expression + -s, --separator=STRING use STRING as the separator instead of newline + +----- + +**Example** + +Input file: + + 0 1 2 3 4 5 # 6 7 8 9 + + +default settings: + + 9 8 7 6 # 5 4 3 2 1 0 + +with option -s 5: + # 6 7 8 9 0 1 2 3 4 5 + +with option -b and -s 5: + + 5 # 6 7 8 9 0 1 2 3 4 + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tail.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,67 @@ +<tool id="tp_tail_tool" name="Select last" version="@BASE_VERSION@.0"> + <description>lines from a dataset (tail)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>tail --version | head -n 1</version_command> + <command> +<![CDATA[ + tail + --lines $complement$num_lines + '$infile' + > '$outfile' +]]> + </command> + <inputs> + <param name="infile" format="txt" type="data" label="Text file" /> + <param name="complement" type="select" label="Operation"> + <option value="">Keep last lines</option> + <option value="+">Keep everything from this line on</option> + </param> + <param name="num_lines" type="integer" size="5" value="10" + label="Number of lines" help="These will be kept (depending on 'operation'). (--lines)" /> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="1.bed"/> + <param name="num_lines" value="10"/> + <output name="outfile" file="tail_results1.bed"/> + </test> + <test> + <param name="infile" value="1.bed"/> + <param name="num_lines" value="10"/> + <param name="complement" value="+"/> + <output name="outfile" file="tail_results2.bed"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool outputs specified number of lines from the **end** of a dataset + +----- + +**Example** + +- Input File:: + + chr7 57134 57154 D17003_CTCF_R7 356 - + chr7 57247 57267 D17003_CTCF_R4 207 + + chr7 57314 57334 D17003_CTCF_R5 269 + + chr7 57341 57361 D17003_CTCF_R7 375 + + chr7 57457 57477 D17003_CTCF_R3 188 + + +- Show last two lines of above file. The result is:: + + chr7 57341 57361 D17003_CTCF_R7 375 + + chr7 57457 57477 D17003_CTCF_R3 188 + + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,65 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +chr10 0.4 +chr1 1.4 +chrM 3e-1 +chr2 1.1e2 +chr15 3.14e-2 +chr15 0.0314 +chr4 0.1 +chr20 0.9 +chr22 +1.3 +chrX -0.3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +12.6 chr1 +990 chr2 +8.1 chr20 +11.7 chr22
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fruit color weight price +apple red 1.4 0.4 +orange orange 1.1 0.2 +banana yellow 0.9 0.35
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fruit weight price +apple 1.4 0.4 +orange 1.1 0.2 +banana 0.9 0.35
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_results2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fruit weight price +apple 1.4 0.4 +orange 1.1 0.2 +banana 0.9 0.35
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_results3.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fru +app +ora +ban
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +Fruit Color +Apple red +Banana yellow +Orange orange +Melon green
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin2.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +Fruit Price +Orange 7 +Avocado 8 +Apple 4 +Banana 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin_result1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,6 @@ +Fruit Color Price +Apple red 4 +Avocado 0 8 +Banana yellow 3 +Melon green 0 +Orange orange 7
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +I have a dream that one day this nation will rise up, and live out the true meaning of its creed: ‘We hold these truths to be self-evident: that all men are created equal.’ +I have a dream that one day on the red hills of Georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at a table of brotherhood. +I have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice and sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice. +I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character. +I have a dream today!
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +1 foo chr1 gene +2 bar chr2 luther +3 baz chrMT gene1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +I have a dream that one great day this nation will rise up, and live out the true meaning of its creed: ‘We hold these truths to be self-evident: that all men are created equal.’ +I have a dream that one great day on the red hills of Georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at a table of brotherhood. +I have a dream that one great day even the state of Mississippi, a state sweltering with the heat of injustice and sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice. +I have a dream that my four little children will one great day live in a nation where they will not be judged by the color of their skin but by the content of their character. +I have a dream today!
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace_results2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +1 foo 1 gene +2 bar 2 luther +3 baz MT gene1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grep1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,152 @@ +>FC0000042:5:1:220:1502 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:34:1398 +GATCTCAGTCCACCGCTGGGATTAACCTTGCCCCCC +>FC0000042:5:1:164:1396 +TATCTTATAGATATTTCCCTCTATACTAGTGACCCC +>FC0000042:5:1:333:925 +GAGCTTATAGCTTGTTATATACGTCAACCCCCCCCC +>FC0000042:5:1:204:1476 +GTACTTATATAGATACAAAATATGTATAGGATTGTC +>FC0000042:5:1:119:1511 +GATCTGCATGACCTGGGATTTGTTGGACCCCCCCCC +>FC0000042:5:1:202:1487 +CATGTATAGTCTCCAGTCTATACAACAACCCCCCCC +>FC0000042:5:1:182:1434 +GCTATAGAAATGTTAACATCGAATGTACATTATAAC +>FC0000042:5:1:627:866 +AATATAGATATGGGACAAAACACATTTAGACCCCCC +>FC0000042:5:1:24:1357 +GATATAATATCAATATCAATCCACGCTTGTTCCCCC +>FC0000042:5:1:187:1492 +TATAGAAGCAGAAGAAACAACCTACTTTCACATGTT +>FC0000042:5:1:45:1344 +CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC +>FC0000042:5:1:87:1299 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:206:1341 +GATATATAGCAGTGACCACCTCTAAGCCCCCCCCCC +>FC0000042:5:1:144:929 +GCCCTGGCATATTGTCAATATCTTTAAACCCCCCCC +>FC0000042:5:1:662:820 +TGTCTTTTCGATTTTTTTCTTTGCGTCACCCCCCCC +>FC0000042:5:1:53:1507 +GACCTCACTGTGGCATGAATCATACATTCCCCCCCC +>FC0000042:5:1:182:1502 +AATGCTTGGCAAAGCTCAACTTCGTTGCCCCCCCCC +>FC0000042:5:1:194:1423 +GATCCTATAGGTCTCGATTGGTCTTTTATTCTTTTT +>FC0000042:5:1:35:1444 +GCTATAGCACGGCATAGTGCGATACTAGTACCCCCC +>FC0000042:5:1:667:872 +GACTATAGGCGGAATGATAATGTCAAATAAGTAGTT +>FC0000042:5:1:147:1438 +GATCAAGGAGACTAGGGAGGTAGGAGTTACTCCCCC +>FC0000042:5:1:467:510 +GAACCACTATAGTGACATGGAACACGCGTGAACCCC +>FC0000042:5:1:1553:1707 +TATAGTTACCCTACTGGGCCGACGATTCCCTTACGA +>FC0000042:5:1:207:964 +AATCTATAGATTTTTCTATTATTGTGTCCTCACCCC +>FC0000042:5:1:169:1468 +GCTCTATAGTTCGAGTTACCAAACTCTTCCCCCCCC +>FC0000042:5:1:42:1465 +GCTCTTTAGGTTTGAACCTGTAGACTTGAGGGGCAT +>FC0000042:5:1:55:1331 +GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT +>FC0000042:5:1:175:1501 +GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC +>FC0000042:5:1:221:1465 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:196:1450 +AATATAGTCTATCCAACAAGATGTAACCCCCCCCCC +>FC0000042:5:1:86:1413 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:453:514 +GATATCTTCGTTTTATATTGAAACTGGCCCCCCCCC +>FC0000042:5:1:150:1415 +TATAGGGCCCTGTATGGTTGCTTGACTAGGGGCTGC +>FC0000042:5:1:191:1475 +GATCCATCCCAATCTCTACGATTGAAAGCATCGGGA +>FC0000042:5:1:26:1407 +GTTATAGAGGCGGGAAGGTGAGAATGCCCCCCCCCC +>FC0000042:5:1:107:1407 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:388:780 +GATCTATAGCTTCTTTAGCTTGGAAACTGGTCAGCC +>FC0000042:5:1:223:1535 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:145:783 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:449:876 +GACCATCAATCAGGTGGAAAGCAGGGCCCCCCCCCC +>FC0000042:5:1:212:1325 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:194:1485 +GAACCGAATCCAACCTGTTTCATTCCTCAGATCCCC +>FC0000042:5:1:507:494 +GATCTTATAGAATTTTTGACAACATAAGTTACCCCC +>FC0000042:5:1:416:938 +AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC +>FC0000042:5:1:633:480 +GAGCTGTGTGCATCTGTCCTGAGAGAGGCAAGATTT +>FC0000042:5:1:53:1443 +GTAATGTTATAGCTAGGATTTTGGAGTTTGGTCCTC +>FC0000042:5:1:45:915 +GTATAGCAGCCTAATAAGGAGCTGGGGACCCCCCCC +>FC0000042:5:1:39:1343 +GTTCTATTTTCGATAAAACTGAACCACCCCCCCCCC +>FC0000042:5:1:46:1501 +GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT +>FC0000042:5:1:187:1507 +GAACTAATCCTGATTTATACAACGGCTCCCCCCCCC +>FC0000042:5:1:91:1364 +AATTTATAGCCACTCTAATTCCGTTTGGTTCCCCCC +>FC0000042:5:1:1542:1751 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:146:886 +GATCTACGATGTACCTTACGCCTCCGAGCATCCCCC +>FC0000042:5:1:615:861 +GATCTACATTATAGATAATGAAGTTCCATTTCCCCC +>FC0000042:5:1:52:792 +GATGTGGTATAGAGAGCAATTCGTTGGTTTTGCCCC +>FC0000042:5:1:153:1433 +GGTCTTTCTATAGAACGGAACGATATATTTTTCCCC +>FC0000042:5:1:540:800 +GAGCGAAAGTGATAGATGGAGGACTATATCTGCCCC +>FC0000042:5:1:160:1344 +GGTGTACTATAGCTATTAAGTCCAATCATGATAATA +>FC0000042:5:1:544:413 +GATCTCTGGAAAATATAAACCGGTGACCCCCCCCCC +>FC0000042:5:1:579:895 +AGTCTCGAATCAATGTATTTCATCGTGGTAATCCCC +>FC0000042:5:1:468:495 +TATTGATGCTCCCTGCCTGAAAGATACCCCCCCCCC +>FC0000042:5:1:383:831 +CTTCATGAATCTACTGTTGGCGTTTATTTTATCTGG +>FC0000042:5:1:112:1416 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:37:1299 +GATCGTGAGCTCTGTACCGGAAGTTCGTGGCTGCCA +>FC0000042:5:1:205:780 +TATAGTGTTCCACAAAGACTAGGTAACGCTTCATTT +>FC0000042:5:1:33:702 +GAACGGACTATAGCCGGTATCCAAACATAAATGTTC +>FC0000042:5:1:54:1019 +AATCGCAGCATTCTGACACACAGGTTTCGGATGTAC +>FC0000042:5:1:587:867 +TATCTAATGTCATATTTTCAGACAAATTACTAGAAA +>FC0000042:5:1:319:990 +GATTTGTAAATTACTTCGAACATAGAAGTTCCCCCC +>FC0000042:5:1:453:829 +GAACTTACGGCATTAAGTTTAATCTTCAGCCACCCC +>FC0000042:5:1:159:1470 +GATCTGATAGTGTTGCGACGTAAATAAGTCCCCCCC +>FC0000042:5:1:487:820 +GATCTCGCAGGGATCAGTTATCCAGGTATTCCCCCC +>FC0000042:5:1:48:371 +AATCTATAATCTTTACCCGAGTTTAAGTCCCCCCCC +>FC0000042:5:1:1346:1739 +GATATAGGTTATACGTTTTTAGTCTTAGAGAAGTTT +>FC0000042:5:1:661:459 +GATCTGCTTTAACGATTGAGGACGATGCCCCCCCCC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grep_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +>FC0000042:5:1:182:1434 +GCTATAGAAATGTTAACATCGAATGTACATTATAAC +>FC0000042:5:1:45:1344 +CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC +>FC0000042:5:1:55:1331 +GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT +>FC0000042:5:1:175:1501 +GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC +>FC0000042:5:1:416:938 +AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC +>FC0000042:5:1:46:1501 +GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT +>FC0000042:5:1:33:702 +GAACGGACTATAGCCGGTATCCAAACATAAATGTTC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grep_results2.html Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,307 @@ +<html> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> +<style type="text/css"> +.ef0,.f0 { color: #000000; } .eb0,.b0 { background-color: #000000; } +.ef1,.f1 { color: #AA0000; } .eb1,.b1 { background-color: #AA0000; } +.ef2,.f2 { color: #00AA00; } .eb2,.b2 { background-color: #00AA00; } +.ef3,.f3 { color: #AA5500; } .eb3,.b3 { background-color: #AA5500; } +.ef4,.f4 { color: #0000AA; } .eb4,.b4 { background-color: #0000AA; } +.ef5,.f5 { color: #AA00AA; } .eb5,.b5 { background-color: #AA00AA; } +.ef6,.f6 { color: #00AAAA; } .eb6,.b6 { background-color: #00AAAA; } +.ef7,.f7 { color: #AAAAAA; } .eb7,.b7 { background-color: #AAAAAA; } +.ef8, .f0 > .bold,.bold > .f0 { color: #555555; font-weight: normal; } +.ef9, .f1 > .bold,.bold > .f1 { color: #FF5555; font-weight: normal; } +.ef10,.f2 > .bold,.bold > .f2 { color: #55FF55; font-weight: normal; } +.ef11,.f3 > .bold,.bold > .f3 { color: #FFFF55; font-weight: normal; } +.ef12,.f4 > .bold,.bold > .f4 { color: #5555FF; font-weight: normal; } +.ef13,.f5 > .bold,.bold > .f5 { color: #FF55FF; font-weight: normal; } +.ef14,.f6 > .bold,.bold > .f6 { color: #55FFFF; font-weight: normal; } +.ef15,.f7 > .bold,.bold > .f7 { color: #FFFFFF; font-weight: normal; } +.eb8 { background-color: #555555; } +.eb9 { background-color: #FF5555; } +.eb10 { background-color: #55FF55; } +.eb11 { background-color: #FFFF55; } +.eb12 { background-color: #5555FF; } +.eb13 { background-color: #FF55FF; } +.eb14 { background-color: #55FFFF; } +.eb15 { background-color: #FFFFFF; } +.ef16 { color: #000000; } .eb16 { background-color: #000000; } +.ef17 { color: #00005f; } .eb17 { background-color: #00005f; } +.ef18 { color: #000087; } .eb18 { background-color: #000087; } +.ef19 { color: #0000af; } .eb19 { background-color: #0000af; } +.ef20 { color: #0000d7; } .eb20 { background-color: #0000d7; } +.ef21 { color: #0000ff; } .eb21 { background-color: #0000ff; } +.ef22 { color: #005f00; } .eb22 { background-color: #005f00; } +.ef23 { color: #005f5f; } .eb23 { background-color: #005f5f; } +.ef24 { color: #005f87; } .eb24 { background-color: #005f87; } +.ef25 { color: #005faf; } .eb25 { background-color: #005faf; } +.ef26 { color: #005fd7; } .eb26 { background-color: #005fd7; } +.ef27 { color: #005fff; } .eb27 { background-color: #005fff; } +.ef28 { color: #008700; } .eb28 { background-color: #008700; } +.ef29 { color: #00875f; } .eb29 { background-color: #00875f; } +.ef30 { color: #008787; } .eb30 { background-color: #008787; } +.ef31 { color: #0087af; } .eb31 { background-color: #0087af; } +.ef32 { color: #0087d7; } .eb32 { background-color: #0087d7; } +.ef33 { color: #0087ff; } .eb33 { background-color: #0087ff; } +.ef34 { color: #00af00; } .eb34 { background-color: #00af00; } +.ef35 { color: #00af5f; } .eb35 { background-color: #00af5f; } +.ef36 { color: #00af87; } .eb36 { background-color: #00af87; } +.ef37 { color: #00afaf; } .eb37 { background-color: #00afaf; } +.ef38 { color: #00afd7; } .eb38 { background-color: #00afd7; } +.ef39 { color: #00afff; } .eb39 { background-color: #00afff; } +.ef40 { color: #00d700; } .eb40 { background-color: #00d700; } +.ef41 { color: #00d75f; } .eb41 { background-color: #00d75f; } +.ef42 { color: #00d787; } .eb42 { background-color: #00d787; } +.ef43 { color: #00d7af; } .eb43 { background-color: #00d7af; } +.ef44 { color: #00d7d7; } .eb44 { background-color: #00d7d7; } +.ef45 { color: #00d7ff; } .eb45 { background-color: #00d7ff; } +.ef46 { color: #00ff00; } .eb46 { background-color: #00ff00; } +.ef47 { color: #00ff5f; } .eb47 { background-color: #00ff5f; } +.ef48 { color: #00ff87; } .eb48 { background-color: #00ff87; } +.ef49 { color: #00ffaf; } .eb49 { background-color: #00ffaf; } +.ef50 { color: #00ffd7; } .eb50 { background-color: #00ffd7; } +.ef51 { color: #00ffff; } .eb51 { background-color: #00ffff; } +.ef52 { color: #5f0000; } .eb52 { background-color: #5f0000; } +.ef53 { color: #5f005f; } .eb53 { background-color: #5f005f; } +.ef54 { color: #5f0087; } .eb54 { background-color: #5f0087; } +.ef55 { color: #5f00af; } .eb55 { background-color: #5f00af; } +.ef56 { color: #5f00d7; } .eb56 { background-color: #5f00d7; } +.ef57 { color: #5f00ff; } .eb57 { background-color: #5f00ff; } +.ef58 { color: #5f5f00; } .eb58 { background-color: #5f5f00; } +.ef59 { color: #5f5f5f; } .eb59 { background-color: #5f5f5f; } +.ef60 { color: #5f5f87; } .eb60 { background-color: #5f5f87; } +.ef61 { color: #5f5faf; } .eb61 { background-color: #5f5faf; } +.ef62 { color: #5f5fd7; } .eb62 { background-color: #5f5fd7; } +.ef63 { color: #5f5fff; } .eb63 { background-color: #5f5fff; } +.ef64 { color: #5f8700; } .eb64 { background-color: #5f8700; } +.ef65 { color: #5f875f; } .eb65 { background-color: #5f875f; } +.ef66 { color: #5f8787; } .eb66 { background-color: #5f8787; } +.ef67 { color: #5f87af; } .eb67 { background-color: #5f87af; } +.ef68 { color: #5f87d7; } .eb68 { background-color: #5f87d7; } +.ef69 { color: #5f87ff; } .eb69 { background-color: #5f87ff; } +.ef70 { color: #5faf00; } .eb70 { background-color: #5faf00; } +.ef71 { color: #5faf5f; } .eb71 { background-color: #5faf5f; } +.ef72 { color: #5faf87; } .eb72 { background-color: #5faf87; } +.ef73 { color: #5fafaf; } .eb73 { background-color: #5fafaf; } +.ef74 { color: #5fafd7; } .eb74 { background-color: #5fafd7; } +.ef75 { color: #5fafff; } .eb75 { background-color: #5fafff; } +.ef76 { color: #5fd700; } .eb76 { background-color: #5fd700; } +.ef77 { color: #5fd75f; } .eb77 { background-color: #5fd75f; } +.ef78 { color: #5fd787; } .eb78 { background-color: #5fd787; } +.ef79 { color: #5fd7af; } .eb79 { background-color: #5fd7af; } +.ef80 { color: #5fd7d7; } .eb80 { background-color: #5fd7d7; } +.ef81 { color: #5fd7ff; } .eb81 { background-color: #5fd7ff; } +.ef82 { color: #5fff00; } .eb82 { background-color: #5fff00; } +.ef83 { color: #5fff5f; } .eb83 { background-color: #5fff5f; } +.ef84 { color: #5fff87; } .eb84 { background-color: #5fff87; } +.ef85 { color: #5fffaf; } .eb85 { background-color: #5fffaf; } +.ef86 { color: #5fffd7; } .eb86 { background-color: #5fffd7; } +.ef87 { color: #5fffff; } .eb87 { background-color: #5fffff; } +.ef88 { color: #870000; } .eb88 { background-color: #870000; } +.ef89 { color: #87005f; } .eb89 { background-color: #87005f; } +.ef90 { color: #870087; } .eb90 { background-color: #870087; } +.ef91 { color: #8700af; } .eb91 { background-color: #8700af; } +.ef92 { color: #8700d7; } .eb92 { background-color: #8700d7; } +.ef93 { color: #8700ff; } .eb93 { background-color: #8700ff; } +.ef94 { color: #875f00; } .eb94 { background-color: #875f00; } +.ef95 { color: #875f5f; } .eb95 { background-color: #875f5f; } +.ef96 { color: #875f87; } .eb96 { background-color: #875f87; } +.ef97 { color: #875faf; } .eb97 { background-color: #875faf; } +.ef98 { color: #875fd7; } .eb98 { background-color: #875fd7; } +.ef99 { color: #875fff; } .eb99 { background-color: #875fff; } +.ef100 { color: #878700; } .eb100 { background-color: #878700; } +.ef101 { color: #87875f; } .eb101 { background-color: #87875f; } +.ef102 { color: #878787; } .eb102 { background-color: #878787; } +.ef103 { color: #8787af; } .eb103 { background-color: #8787af; } +.ef104 { color: #8787d7; } .eb104 { background-color: #8787d7; } +.ef105 { color: #8787ff; } .eb105 { background-color: #8787ff; } +.ef106 { color: #87af00; } .eb106 { background-color: #87af00; } +.ef107 { color: #87af5f; } .eb107 { background-color: #87af5f; } +.ef108 { color: #87af87; } .eb108 { background-color: #87af87; } +.ef109 { color: #87afaf; } .eb109 { background-color: #87afaf; } +.ef110 { color: #87afd7; } .eb110 { background-color: #87afd7; } +.ef111 { color: #87afff; } .eb111 { background-color: #87afff; } +.ef112 { color: #87d700; } .eb112 { background-color: #87d700; } +.ef113 { color: #87d75f; } .eb113 { background-color: #87d75f; } +.ef114 { color: #87d787; } .eb114 { background-color: #87d787; } +.ef115 { color: #87d7af; } .eb115 { background-color: #87d7af; } +.ef116 { color: #87d7d7; } .eb116 { background-color: #87d7d7; } +.ef117 { color: #87d7ff; } .eb117 { background-color: #87d7ff; } +.ef118 { color: #87ff00; } .eb118 { background-color: #87ff00; } +.ef119 { color: #87ff5f; } .eb119 { background-color: #87ff5f; } +.ef120 { color: #87ff87; } .eb120 { background-color: #87ff87; } +.ef121 { color: #87ffaf; } .eb121 { background-color: #87ffaf; } +.ef122 { color: #87ffd7; } .eb122 { background-color: #87ffd7; } +.ef123 { color: #87ffff; } .eb123 { background-color: #87ffff; } +.ef124 { color: #af0000; } .eb124 { background-color: #af0000; } +.ef125 { color: #af005f; } .eb125 { background-color: #af005f; } +.ef126 { color: #af0087; } .eb126 { background-color: #af0087; } +.ef127 { color: #af00af; } .eb127 { background-color: #af00af; } +.ef128 { color: #af00d7; } .eb128 { background-color: #af00d7; } +.ef129 { color: #af00ff; } .eb129 { background-color: #af00ff; } +.ef130 { color: #af5f00; } .eb130 { background-color: #af5f00; } +.ef131 { color: #af5f5f; } .eb131 { background-color: #af5f5f; } +.ef132 { color: #af5f87; } .eb132 { background-color: #af5f87; } +.ef133 { color: #af5faf; } .eb133 { background-color: #af5faf; } +.ef134 { color: #af5fd7; } .eb134 { background-color: #af5fd7; } +.ef135 { color: #af5fff; } .eb135 { background-color: #af5fff; } +.ef136 { color: #af8700; } .eb136 { background-color: #af8700; } +.ef137 { color: #af875f; } .eb137 { background-color: #af875f; } +.ef138 { color: #af8787; } .eb138 { background-color: #af8787; } +.ef139 { color: #af87af; } .eb139 { background-color: #af87af; } +.ef140 { color: #af87d7; } .eb140 { background-color: #af87d7; } +.ef141 { color: #af87ff; } .eb141 { background-color: #af87ff; } +.ef142 { color: #afaf00; } .eb142 { background-color: #afaf00; } +.ef143 { color: #afaf5f; } .eb143 { background-color: #afaf5f; } +.ef144 { color: #afaf87; } .eb144 { background-color: #afaf87; } +.ef145 { color: #afafaf; } .eb145 { background-color: #afafaf; } +.ef146 { color: #afafd7; } .eb146 { background-color: #afafd7; } +.ef147 { color: #afafff; } .eb147 { background-color: #afafff; } +.ef148 { color: #afd700; } .eb148 { background-color: #afd700; } +.ef149 { color: #afd75f; } .eb149 { background-color: #afd75f; } +.ef150 { color: #afd787; } .eb150 { background-color: #afd787; } +.ef151 { color: #afd7af; } .eb151 { background-color: #afd7af; } +.ef152 { color: #afd7d7; } .eb152 { background-color: #afd7d7; } +.ef153 { color: #afd7ff; } .eb153 { background-color: #afd7ff; } +.ef154 { color: #afff00; } .eb154 { background-color: #afff00; } +.ef155 { color: #afff5f; } .eb155 { background-color: #afff5f; } +.ef156 { color: #afff87; } .eb156 { background-color: #afff87; } +.ef157 { color: #afffaf; } .eb157 { background-color: #afffaf; } +.ef158 { color: #afffd7; } .eb158 { background-color: #afffd7; } +.ef159 { color: #afffff; } .eb159 { background-color: #afffff; } +.ef160 { color: #d70000; } .eb160 { background-color: #d70000; } +.ef161 { color: #d7005f; } .eb161 { background-color: #d7005f; } +.ef162 { color: #d70087; } .eb162 { background-color: #d70087; } +.ef163 { color: #d700af; } .eb163 { background-color: #d700af; } +.ef164 { color: #d700d7; } .eb164 { background-color: #d700d7; } +.ef165 { color: #d700ff; } .eb165 { background-color: #d700ff; } +.ef166 { color: #d75f00; } .eb166 { background-color: #d75f00; } +.ef167 { color: #d75f5f; } .eb167 { background-color: #d75f5f; } +.ef168 { color: #d75f87; } .eb168 { background-color: #d75f87; } +.ef169 { color: #d75faf; } .eb169 { background-color: #d75faf; } +.ef170 { color: #d75fd7; } .eb170 { background-color: #d75fd7; } +.ef171 { color: #d75fff; } .eb171 { background-color: #d75fff; } +.ef172 { color: #d78700; } .eb172 { background-color: #d78700; } +.ef173 { color: #d7875f; } .eb173 { background-color: #d7875f; } +.ef174 { color: #d78787; } .eb174 { background-color: #d78787; } +.ef175 { color: #d787af; } .eb175 { background-color: #d787af; } +.ef176 { color: #d787d7; } .eb176 { background-color: #d787d7; } +.ef177 { color: #d787ff; } .eb177 { background-color: #d787ff; } +.ef178 { color: #d7af00; } .eb178 { background-color: #d7af00; } +.ef179 { color: #d7af5f; } .eb179 { background-color: #d7af5f; } +.ef180 { color: #d7af87; } .eb180 { background-color: #d7af87; } +.ef181 { color: #d7afaf; } .eb181 { background-color: #d7afaf; } +.ef182 { color: #d7afd7; } .eb182 { background-color: #d7afd7; } +.ef183 { color: #d7afff; } .eb183 { background-color: #d7afff; } +.ef184 { color: #d7d700; } .eb184 { background-color: #d7d700; } +.ef185 { color: #d7d75f; } .eb185 { background-color: #d7d75f; } +.ef186 { color: #d7d787; } .eb186 { background-color: #d7d787; } +.ef187 { color: #d7d7af; } .eb187 { background-color: #d7d7af; } +.ef188 { color: #d7d7d7; } .eb188 { background-color: #d7d7d7; } +.ef189 { color: #d7d7ff; } .eb189 { background-color: #d7d7ff; } +.ef190 { color: #d7ff00; } .eb190 { background-color: #d7ff00; } +.ef191 { color: #d7ff5f; } .eb191 { background-color: #d7ff5f; } +.ef192 { color: #d7ff87; } .eb192 { background-color: #d7ff87; } +.ef193 { color: #d7ffaf; } .eb193 { background-color: #d7ffaf; } +.ef194 { color: #d7ffd7; } .eb194 { background-color: #d7ffd7; } +.ef195 { color: #d7ffff; } .eb195 { background-color: #d7ffff; } +.ef196 { color: #ff0000; } .eb196 { background-color: #ff0000; } +.ef197 { color: #ff005f; } .eb197 { background-color: #ff005f; } +.ef198 { color: #ff0087; } .eb198 { background-color: #ff0087; } +.ef199 { color: #ff00af; } .eb199 { background-color: #ff00af; } +.ef200 { color: #ff00d7; } .eb200 { background-color: #ff00d7; } +.ef201 { color: #ff00ff; } .eb201 { background-color: #ff00ff; } +.ef202 { color: #ff5f00; } .eb202 { background-color: #ff5f00; } +.ef203 { color: #ff5f5f; } .eb203 { background-color: #ff5f5f; } +.ef204 { color: #ff5f87; } .eb204 { background-color: #ff5f87; } +.ef205 { color: #ff5faf; } .eb205 { background-color: #ff5faf; } +.ef206 { color: #ff5fd7; } .eb206 { background-color: #ff5fd7; } +.ef207 { color: #ff5fff; } .eb207 { background-color: #ff5fff; } +.ef208 { color: #ff8700; } .eb208 { background-color: #ff8700; } +.ef209 { color: #ff875f; } .eb209 { background-color: #ff875f; } +.ef210 { color: #ff8787; } .eb210 { background-color: #ff8787; } +.ef211 { color: #ff87af; } .eb211 { background-color: #ff87af; } +.ef212 { color: #ff87d7; } .eb212 { background-color: #ff87d7; } +.ef213 { color: #ff87ff; } .eb213 { background-color: #ff87ff; } +.ef214 { color: #ffaf00; } .eb214 { background-color: #ffaf00; } +.ef215 { color: #ffaf5f; } .eb215 { background-color: #ffaf5f; } +.ef216 { color: #ffaf87; } .eb216 { background-color: #ffaf87; } +.ef217 { color: #ffafaf; } .eb217 { background-color: #ffafaf; } +.ef218 { color: #ffafd7; } .eb218 { background-color: #ffafd7; } +.ef219 { color: #ffafff; } .eb219 { background-color: #ffafff; } +.ef220 { color: #ffd700; } .eb220 { background-color: #ffd700; } +.ef221 { color: #ffd75f; } .eb221 { background-color: #ffd75f; } +.ef222 { color: #ffd787; } .eb222 { background-color: #ffd787; } +.ef223 { color: #ffd7af; } .eb223 { background-color: #ffd7af; } +.ef224 { color: #ffd7d7; } .eb224 { background-color: #ffd7d7; } +.ef225 { color: #ffd7ff; } .eb225 { background-color: #ffd7ff; } +.ef226 { color: #ffff00; } .eb226 { background-color: #ffff00; } +.ef227 { color: #ffff5f; } .eb227 { background-color: #ffff5f; } +.ef228 { color: #ffff87; } .eb228 { background-color: #ffff87; } +.ef229 { color: #ffffaf; } .eb229 { background-color: #ffffaf; } +.ef230 { color: #ffffd7; } .eb230 { background-color: #ffffd7; } +.ef231 { color: #ffffff; } .eb231 { background-color: #ffffff; } +.ef232 { color: #080808; } .eb232 { background-color: #080808; } +.ef233 { color: #121212; } .eb233 { background-color: #121212; } +.ef234 { color: #1c1c1c; } .eb234 { background-color: #1c1c1c; } +.ef235 { color: #262626; } .eb235 { background-color: #262626; } +.ef236 { color: #303030; } .eb236 { background-color: #303030; } +.ef237 { color: #3a3a3a; } .eb237 { background-color: #3a3a3a; } +.ef238 { color: #444444; } .eb238 { background-color: #444444; } +.ef239 { color: #4e4e4e; } .eb239 { background-color: #4e4e4e; } +.ef240 { color: #585858; } .eb240 { background-color: #585858; } +.ef241 { color: #626262; } .eb241 { background-color: #626262; } +.ef242 { color: #6c6c6c; } .eb242 { background-color: #6c6c6c; } +.ef243 { color: #767676; } .eb243 { background-color: #767676; } +.ef244 { color: #808080; } .eb244 { background-color: #808080; } +.ef245 { color: #8a8a8a; } .eb245 { background-color: #8a8a8a; } +.ef246 { color: #949494; } .eb246 { background-color: #949494; } +.ef247 { color: #9e9e9e; } .eb247 { background-color: #9e9e9e; } +.ef248 { color: #a8a8a8; } .eb248 { background-color: #a8a8a8; } +.ef249 { color: #b2b2b2; } .eb249 { background-color: #b2b2b2; } +.ef250 { color: #bcbcbc; } .eb250 { background-color: #bcbcbc; } +.ef251 { color: #c6c6c6; } .eb251 { background-color: #c6c6c6; } +.ef252 { color: #d0d0d0; } .eb252 { background-color: #d0d0d0; } +.ef253 { color: #dadada; } .eb253 { background-color: #dadada; } +.ef254 { color: #e4e4e4; } .eb254 { background-color: #e4e4e4; } +.ef255 { color: #eeeeee; } .eb255 { background-color: #eeeeee; } + +.f9 { color: #000000; } +.b9 { background-color: #FFFFFF; } +.f9 > .bold,.bold > .f9, body.f9 > pre > .bold { + /* Bold is heavy black on white, or bright white + depending on the default background */ + color: #000000; + font-weight: bold; +} +.reverse { + /* CSS doesnt support swapping fg and bg colours unfortunately, + so just hardcode something that will look OK on all backgrounds. */ + color: #000000; background-color: #AAAAAA; +} +.underline { text-decoration: underline; } +.line-through { text-decoration: line-through; } +.blink { text-decoration: blink; } + +</style> +</head> + +<body class="f9 b9"> +<pre> +GCTATAG<span class="bold"><span class="f4">AAATGT</span></span>TAACATCGAATGTACATTATAAC +<span class="f6">--</span> +CAGCTAACAATC<span class="bold"><span class="f4">AAGCGT</span></span>TACAGATTAGCCCCCCCC +<span class="f6">--</span> +GAACTTGCGTAACGTACAAAAATGCAAGCA<span class="bold"><span class="f4">AAAAGT</span></span> +<span class="f6">--</span> +GCTCTGTTAATCTAGA<span class="bold"><span class="f4">AAATGT</span></span>GTCTCCCCCCCCCC +<span class="f6">--</span> +<span class="bold"><span class="f4">AATCGT</span></span>ATAGCTCGGGCCGGATACTAGTACACCCCC +<span class="f6">--</span> +GATATAGTGGATAACTAATGCTCCCCCAG<span class="bold"><span class="f4">AACTGT</span></span>T +<span class="f6">--</span> +GAACGGACTATAGCCGGTATCCAAACAT<span class="bold"><span class="f4">AAATGT</span></span>TC +</pre> +</body> +</html>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/head_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input1__1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,9 @@ +CDKN2A 4 +CDKN2B 5 +DHX37 8 +LOC255 9 +LOC468 3 +OR4M2 12 +ORN4 1 +POTE15 3 +RI3BP 5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input1__2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,7 @@ +CDKN2A 4 +DHX37 8 +HES7 1 +ILKA3 8 +LOC255 9 +MOUB 3 +UTJX 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input2__1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +Gene Experiment1 +CDKN2A 4 +CDKN2B 5 +DHX37 8 +LOC255 9 +LOC468 3 +OR4M2 12 +ORN4 1 +POTE15 3 +RI3BP 5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input2__2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +Gene Experiment2 +CDKN2A 4 +DHX37 8 +HES7 1 +ILKA3 8 +LOC255 9 +MOUB 3 +UTJX 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output1_1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,13 @@ +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output1_2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output2_1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +Gene Experiment1 Experiment2 +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output2_2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,11 @@ +Gene Experiment1 Experiment2 +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,9 @@ +chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 +chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 +chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 +chr4 995793 996435 FBtr0111046 0 + 7 166 642 +chr4 995793 997931 FBtr0111044 0 + 28 683 2138 +chr4 995793 997931 FBtr0111045 0 + 28 683 2138 +chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,9 @@ +chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 +chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 +chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 +chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 +chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 +chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 +chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 +chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 +chr4 252541 266528 FBtr0300797 0 + 56 1296 13987
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin3.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 +chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 +chr4 995793 996435 FBtr0111046 0 + 5 304 642 +chr4 995793 997931 FBtr0111044 0 + 17 714 2138 +chr4 995793 997931 FBtr0111045 0 + 17 714 2138 +chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin_result1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,19 @@ +key dataset_1_V7 dataset_1_V8 dataset_1_V9 dataset_2_V7 dataset_2_V8 dataset_2_V9 dataset_3_V7 dataset_3_V8 dataset_3_V9 +FBtr0089116 0 0 0 56 1296 15144 0 0 0 +FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 +FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0111044 28 683 2138 0 0 0 17 714 2138 +FBtr0111045 28 683 2138 0 0 0 17 714 2138 +FBtr0111046 7 166 642 0 0 0 5 304 642 +FBtr0300796 0 0 0 56 1296 14475 0 0 0 +FBtr0300797 0 0 0 56 1296 13987 0 0 0 +FBtr0300798 0 0 0 56 1296 14473 0 0 0 +FBtr0300799 0 0 0 56 1296 14473 0 0 0 +FBtr0300800 0 0 0 56 1296 14475 0 0 0 +FBtr0308086 0 0 0 56 1296 14456 0 0 0 +FBtr0308087 0 0 0 56 1296 14456 0 0 0 +FBtr0308778 266 1527 1722 0 0 0 0 0 0 +FBtr0309803 0 0 0 657 29084 44167 0 0 0 +FBtr0310651 3944 6428 6850 0 0 0 9927 6738 6850
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/recurring_result1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/recurring_result2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,20 @@ +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +war is over +war is over +war is over +war is over +war is over +war is over +war is over +war is over +war is over +war is over
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_ending_input1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + +chr7 56772 56792 D17003_CTCF_R7 372 + +chr7 56775 56795 D17003_CTCF_R4 207 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_ending_output1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_column1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_column_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 R6 310 + +chr7 56736 56756 R7 354 + +chr7 56761 56781 R4 220 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_FOOBAR_R6 310 + +chr7 56736 56756 D17003_FOOBAR_R7 354 + +chr7 56761 56781 D17003_FOOBAR_R4 220 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sed1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +This is a header line +Lorem ipsum dolor foo sit amet foo, +consectetur adipiscing elit. +Nam foo ut nulla non neque faucibus commodo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sed_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +Lorem ipsum dolor bar sit amet foo, +consectetur adipiscing elit. +Nam bar ut nulla non neque faucibus commodo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sed_results2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,2 @@ +Lorem ipsum dolor baz sit amet baz, +Nam baz ut nulla non neque faucibus commodo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort2.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,6 @@ +chr10 100 200 feature1 100.01 + +chr20 800 900 feature2 1.1 + +chr2 500 600 feature3 1000.1 + +chr1 300 400 feature4 1.1e-05 + +chr21 300 500 feature5 1.1e2 + +chr15 700 800 feature6 1.1e4 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_input2__1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +Gene Experiment1 +LOC468 3 +CDKN2B 5 +RI3BP 5 +ORN4 1 +POTE15 3 +OR4M2 12 +LOC255 9 +DHX37 8 +CDKN2A 4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_input2__2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +Gene Experiment2 +ILKA3 8 +UTJX 3 +HES7 1 +MOUB 3 +LOC255 9 +DHX37 8 +CDKN2A 4
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_output2_1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +Gene Experiment1 Experiment2 +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_output2_2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,11 @@ +Gene Experiment1 Experiment2 +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result2.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result3.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,6 @@ +chr1 300 400 feature4 1.1e-05 + +chr20 800 900 feature2 1.1 + +chr10 100 200 feature1 100.01 + +chr21 300 500 feature5 1.1e2 + +chr2 500 600 feature3 1000.1 + +chr15 700 800 feature6 1.1e4 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_rows1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +a b c d e f +f e d c b a +7 6 5 4 3 2 +1 2 3 4 5 6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_rows_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +a b c d e f +a b c d e f +2 3 4 5 6 7 +1 2 3 4 5 6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +a +A +b +b +B +c +d +e
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq_results1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +1 c +1 d +1 e
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq_results2.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,12 @@ +a +A + +b +b +B + +c + +d + +e
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tac_result1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,65 @@ +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tac_result2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,66 @@ + + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tail_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tail_results2.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,56 @@ +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unfold_column1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,1 @@ +a b 1 2 3 4 5 c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unfold_column_result1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +a b 1 c +a b 2 c +a b 3 c +a b 4 c +a b 5 c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unique_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,65 @@ +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,37 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="gnu_coreutils" version="8.22"> + <repository changeset_revision="ac64dfe4b1fb" name="package_gnu_coreutils_8_22" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="gnu_awk" version="4.1.0"> + <repository changeset_revision="52a8268bb49f" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="gnu_grep" version="2.14"> + <repository changeset_revision="42bbeb602ba9" name="package_gnu_grep_2_14" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="gnu_sed" version="4.2.2-sandbox"> + <repository changeset_revision="76cc52f79dd4" name="package_gnu_sed_4_2_2_sandbox" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <set_environment version="1.0"> + <environment_variable action="set_to" name="TP_SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> + </set_environment> + <package name="perl" version="5.18.1"> + <repository changeset_revision="114b6af405fa" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="text_processing_perl_packages" version="1.0"> + <install version="1.0"> + <actions> + <action type="setup_perl_environment"> + <repository changeset_revision="114b6af405fa" name="package_perl_5_18" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"> + <package name="perl" version="5.18.1" /> + </repository> + <!-- allow downloading and installing an Perl package from cpan.org--> + <package>https://cpan.metacpan.org/authors/id/S/SA/SALVA/Sort-Key-1.33.tar.gz</package> + </action> + </actions> + </install> + <readme> + Needed perl packages. + </readme> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unfold_column.py Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +import sys + +out = open(sys.argv[4], 'w+') + +with open(sys.argv[1]) as handle: + for line in handle: + cols = line.split('\t') + unfolding_column = int(sys.argv[2]) - 1 + column_content = cols[ unfolding_column ] + for elem in column_content.split( sys.argv[3] ): + out.write( '\t'.join( cols[:unfolding_column] + [elem] + cols[unfolding_column+1:]) ) +out.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unfold_column.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,63 @@ +<tool id="tp_unfold_column_tool" name="Unfold" version="@BASE_VERSION@.0"> + <description>columns from a table</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command interpreter="python"> +<![CDATA[ + unfold_column.py + '${infile}' + $column + "$delimiter" + '${outfile}' +]]> + </command> + <inputs> + <param name="infile" format="tabular" type="data" label="File to unfold" /> + <param name="column" type="data_column" data_ref="infile" accept_default="true" label="Column to use for unfolding" /> + <param name="delimiter" type="select" label="Values in column are delimited by"> + <option value=" ">Whitespace</option> + <option value=".">Dot</option> + <option value=",">Comma</option> + <option value="-">Dash</option> + <option value="_">Underscore</option> + <option value="|">Pipe</option> + </param> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile" /> + </outputs> + <tests> + <test> + <param name="infile" value="unfold_column1.tabular" ftype="tabular"/> + <param name="delimiter" value=" "/> + <param name="column" value="3"/> + <output name="outfile" file="unfold_column_result1.tabular" ftype="tabular"/> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +This tool will unfold one column of your input dataset. + +----- + +Input Example:: + + a b 1,2,3,4,5 c + + +Output Example:: + + a b 1 c + a b 2 c + a b 3 c + a b 4 c + a b 5 c + +@REFERENCES@ +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unsorted_uniq.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,93 @@ +<tool id="tp_sorted_uniq" name="Unique" version="@BASE_VERSION@.0"> + <description>occurrences of each record</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>sort --version | head -n 1</version_command> + <command> +<![CDATA[ + sort -u + $ignore_case + $is_numeric + -t ' ' + #if $adv_opts.adv_opts_selector == "advanced": + -k$adv_opts.column_start,$adv_opts.column_end + #end if + -o "$outfile" + "$infile" +]]> + </command> + <inputs> + <param name="infile" type="data" format="tabular" label="File to scan for unique values" /> + <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" checked="False" + label="Ignore differences in case when comparing" help="(-f)"/> + <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" checked="False" + label="Column only contains numeric values" help="(-n)" /> + <conditional name="adv_opts"> + <param name="adv_opts_selector" type="select" label="Advanced Options"> + <option value="basic" selected="True">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <param name="column_start" type="data_column" data_ref="infile" label="Column start" help="Unique on specific column range"/> + <param name="column_end" type="data_column" data_ref="infile" label="Column end" help="Unique on specific column range"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="outfile" format_source="infile" metadata_source="infile"/> + </outputs> + <tests> + <test> + <param name="infile" value="1.bed"/> + <param name="is_numeric" value="True"/> + <param name="ignore_case" value="True"/> + <param name="adv_opts_selector" value="advanced"/> + <param name="column_start" value="2"/> + <param name="column_end" value="3"/> + <output name="outfile" file="unique_results1.bed"/> + </test> + </tests> + <help> +<![CDATA[ + .. class:: infomark + +**Syntax** + +This tool returns all unique lines using the 'sort -u' command. It can be used with unsorted files. +If you need additional options, like grouping or counting your unique results, please use the 'Unique lines from sorted file' tool. + +----- + +.. class:: infomark + +The input file needs to be tab separated. Please convert your file if necessary. + +----- + +**Example** + +- Input file:: + + chr1 10 100 gene1 + chr1 105 200 gene2 + chr1 10 100 gene1 + chr2 10 100 gene4 + chr2 1000 1900 gene5 + chr3 15 1656 gene6 + chr2 10 100 gene4 + +- Unique lines will result in:: + + chr1 10 100 gene1 + chr1 105 200 gene2 + chr2 10 100 gene4 + chr2 1000 1900 gene5 + chr3 15 1656 gene6 + +@REFERENCES@ +]]> +</help> +</tool>