# HG changeset patch # User bgruening # Date 1422535997 18000 # Node ID 5314e5d6f040286c47aee7cdf652579290c3e501 Imported from capsule None diff -r 000000000000 -r 5314e5d6f040 ansi2html.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ansi2html.sh Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,331 @@ +#!/bin/sh + +# Convert ANSI (terminal) colours and attributes to HTML + +# Author: +# http://www.pixelbeat.org/docs/terminal_colours/ +# Examples: +# ls -l --color=always | ansi2html.sh > ls.html +# git show --color | ansi2html.sh > last_change.html +# Generally one can use the `script` util to capture full terminal output. +# Changes: +# V0.1, 24 Apr 2008, Initial release +# V0.2, 01 Jan 2009, Phil Harnish +# Support `git diff --color` output by +# matching ANSI codes that specify only +# bold or background colour. +# P@draigBrady.com +# Support `ls --color` output by stripping +# redundant leading 0s from ANSI codes. +# Support `grep --color=always` by stripping +# unhandled ANSI codes (specifically ^[[K). +# V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ +# Remove cat -v usage which mangled non ascii input. +# Cleanup regular expressions used. +# Support other attributes like reverse, ... +# P@draigBrady.com +# Correctly nest tags (even across lines). +# Add a command line option to use a dark background. +# Strip more terminal control codes. +# V0.4, 17 Sep 2009, P@draigBrady.com +# Handle codes with combined attributes and color. +# Handle isolated attributes with css. +# Strip more terminal control codes. +# V0.12, 12 Jul 2011 +# http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh + +if [ "$1" = "--version" ]; then + echo "0.12" && exit +fi + +if [ "$1" = "--help" ]; then + echo "This utility converts ANSI codes in data passed to stdin" >&2 + echo "It has 2 optional parameters:" >&2 + echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 + echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 + exit +fi + +[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } + +if [ "$1" = "--palette=solarized" ]; then + # See http://ethanschoonover.com/solarized + P0=073642; P1=D30102; P2=859900; P3=B58900; + P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; + P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; + P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; + shift; +elif [ "$1" = "--palette=solarized-xterm" ]; then + # Above mapped onto the xterm 256 color palette + P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; + P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; + P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; + P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; + shift; +elif [ "$1" = "--palette=tango" ]; then + # Gnome default + P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; + P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; + P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; + P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; + shift; +elif [ "$1" = "--palette=xterm" ]; then + P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; + P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; + P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; + P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; + shift; +else # linux console + P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; + P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; + P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; + P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; + [ "$1" = "--palette=linux" ] && shift +fi + +[ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } + +echo -n " + + + + + + +
+'
+
+p='\x1b\['        #shortcut to match escape codes
+P="\(^[^°]*\)¡$p" #expression to match prepended codes below
+
+# Handle various xterm control sequences.
+# See /usr/share/doc/xterm-*/ctlseqs.txt
+sed "
+s#\x1b[^\x1b]*\x1b\\\##g  # strip anything between \e and ST
+s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.)
+
+#handle carriage returns
+s#^.*\r\{1,\}\([^$]\)#\1#
+s#\r\$## # strip trailing \r
+
+# strip other non SGR escape sequences
+s#[\x07]##g
+s#\x1b[]>=\][0-9;]*##g
+s#\x1bP+.\{5\}##g
+s#${p}[0-9;?]*[^0-9;?m]##g
+
+#remove backspace chars and what they're backspacing over
+:rm_bs
+s#[^\x08]\x08##g; t rm_bs
+" |
+
+# Normalize the input before transformation
+sed "
+# escape HTML
+s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g
+
+# normalize SGR codes a little
+
+# split 256 colors out and mark so that they're not
+# recognised by the following 'split combined' line
+:e
+s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e
+s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g;
+
+:c
+s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c   # split combined
+s#${p}0\([0-7]\)#${p}\1#g                                 #strip leading 0
+s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g                   #bold last (with clr)
+s#${p}m#${p}0m#g                                          #add leading 0 to norm
+
+# undo any 256 color marking
+s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g;
+
+# map 16 color codes to color + bold
+s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g;
+s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g;
+
+# change 'reset' code to a single char, and prepend a single char to
+# other codes so that we can easily do negative matching, as sed
+# does not support look behind expressions etc.
+s#°#\°#g; s#${p}0m#°#g
+s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g
+" |
+
+# Convert SGR sequences to HTML
+sed "
+:ansi_to_span # replace ANSI codes with CSS classes
+t ansi_to_span # hack so t commands below only apply to preceeding s cmd
+
+/^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code
+
+# common combinations to minimise html (optional)
+s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1#;t span_count
+s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1#;t span_count
+
+s#${P}1m#\1#;                            t span_count
+s#${P}4m#\1#;                       t span_count
+s#${P}5m#\1#;                           t span_count
+s#${P}7m#\1#;                         t span_count
+s#${P}9m#\1#;                    t span_count
+s#${P}3\([0-9]\)m#\1#;                    t span_count
+s#${P}4\([0-9]\)m#\1#;                    t span_count
+
+s#${P}38;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
+s#${P}48;5;\([0-9]\{1,3\}\)m#\1#;        t span_count
+
+s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes
+
+b # next line of input
+
+# add a corresponding span end flag
+:span_count
+x; s/^/s/; x
+b ansi_to_span
+
+# replace 'reset code' with correct number of  tags
+:span_end
+x
+/^s/ {
+  s/^.//
+  x
+  s#°#°#
+  b span_end
+}
+x
+s#°##
+b ansi_to_span
+" |
+
+# Convert alternative character set
+# Note we convert here, as if we do at start we have to worry about avoiding
+# conversion of SGR codes etc., whereas doing here we only have to
+# avoid conversions of stuff between &...; or <...>
+#
+# Note we could use sed to do this based around:
+#   sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/'
+# However that would be very awkward as we need to only conv some input.
+# The basic scheme that we do in the python script below is:
+#  1. enable transliterate once ¡ char seen
+#  2. disable once µ char seen (may be on diff line to ¡)
+#  3. never transliterate between &; or <> chars
+sed "
+# change 'smacs' and 'rmacs' to a single char so that we can easily do
+# negative matching, as sed does not support look behind expressions etc.
+# Note we don't use ° like above as that's part of the alternate charset.
+s#\x1b(0#¡#g;
+s#µ#\µ#g; s#\x1b(B#µ#g
+" |
+(
+python -c "
+# vim:fileencoding=utf8
+
+import sys
+import locale
+encoding=locale.getpreferredencoding()
+
+old='abcdefghijklmnopqrstuvwxyz{}\`~'
+new='▒␉␌␍␊°±␤␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·'
+new=unicode(new, 'utf-8')
+table=range(128)
+for o,n in zip(old, new): table[ord(o)]=n
+
+(STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3)
+
+state = STANDARD
+last_mode = STANDARD
+for c in unicode(sys.stdin.read(), encoding):
+  if state == HTML_TAG:
+    if c == '>':
+      state = last_mode
+  elif state == HTML_ENTITY:
+    if c == ';':
+      state = last_mode
+  else:
+    if c == '<':
+      state = HTML_TAG
+    elif c == '&':
+      state = HTML_ENTITY
+    elif c == u'¡' and state == STANDARD:
+      state = ALTERNATIVE
+      last_mode = ALTERNATIVE
+      continue
+    elif c == u'µ' and state == ALTERNATIVE:
+      state = STANDARD
+      last_mode = STANDARD
+      continue
+    elif state == ALTERNATIVE:
+      c = c.translate(table)
+  sys.stdout.write(c.encode(encoding))
+" 2>/dev/null ||
+sed 's/[¡µ]//g' # just strip aternative flag chars
+)
+
+echo "
+ +" diff -r 000000000000 -r 5314e5d6f040 awk.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/awk.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,124 @@ + + with awk + + macros.xml + + + gnu_awk + + awk --version | head -n 1 + + "$outfile" +]]> + + + + + + + + + + + + + $code + + + + + + + + + + + + + +23** will match lines that after subtracting the value of the fourth column from the value of the fifth column, gives value alrger than 23. +- **/AG..AG/** will match lines that contain the regular expression **AG..AG** (meaning the characeters AG followed by any two characeters followed by AG). (This is the way to specify regular expressions on the entire line, similar to GREP.) +- **$7 ~ /A{4}U/** will match lines whose seventh column contains 4 consecutive A's followed by a U. (This is the way to specify regular expressions on a specific field.) +- **10000 < $4 && $4 < 20000** will match lines whose fourth column value is larger than 10,000 but smaller than 20,000 +- If no pattern is specified, all lines match (meaning the **action** part will be executed on all lines). + + +**Action Examples** + +- **{ print }** or **{ print $0 }** will print the entire input line (the line that matched in **pattern**). **$0** is a special marker meaning 'the entire line'. +- **{ print $1, $4, $5 }** will print only the first, fourth and fifth fields of the input line. +- **{ print $4, $5-$4 }** will print the fourth column and the difference between the fifth and fourth column. (If the fourth column was start-position in the input file, and the fifth column was end-position - the output file will contain the start-position, and the length). +- If no action part is specified (not even the curly brackets) - the default action is to print the entire line. + + +**AWK's Regular Expression Syntax** + +The select tool searches the data for lines containing or not containing a match to the given pattern. A Regular Expression is a pattern descibing a certain amount of text. + +- **( ) { } [ ] . * ? + \ ^ $** are all special characters. **\\** can be used to "escape" a special character, allowing that special character to be searched for. +- **^** matches the beginning of a string(but not an internal line). +- **(** .. **)** groups a particular pattern. +- **{** n or n, or n,m **}** specifies an expected number of repetitions of the preceding pattern. + + - **{n}** The preceding item is matched exactly n times. + - **{n,}** The preceding item ismatched n or more times. + - **{n,m}** The preceding item is matched at least n times but not more than m times. + +- **[** ... **]** creates a character class. Within the brackets, single characters can be placed. A dash (-) may be used to indicate a range such as **a-z**. +- **.** Matches any single character except a newline. +- ***** The preceding item will be matched zero or more times. +- **?** The preceding item is optional and matched at most once. +- **+** The preceding item will be matched one or more times. +- **^** has two meaning: + - matches the beginning of a line or string. + - indicates negation in a character class. For example, [^...] matches every character except the ones inside brackets. +- **$** matches the end of a line or string. +- **\|** Separates alternate possibilities. + +@REFERENCES@ +]]> + + diff -r 000000000000 -r 5314e5d6f040 cut.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,259 @@ + + columns from a table + + macros.xml + + + cut --version | head -n 1 + + '${output}' +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 easyjoin --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/easyjoin Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,308 @@ +#!/usr/bin/env perl +## EASY Join - +## Join with automatic pre-sorting of both files +## Copyright (C) 2010 A. Gordon (gordon@cshl.edu) +## license: AGPLv3+ +use strict; +use warnings; +use Data::Dumper; +use Getopt::Long qw(:config bundling no_ignore_case_always); +use File::Temp qw/tempfile/; +use POSIX qw(locale_h); + +sub show_help(); +sub show_version(); +sub show_examples(); +sub parse_commandline_options(); +sub sort_file($$$); +sub join_files($$); +sub cleanup_files(@); + + +my $PROGRAM="easyjoin"; +my $VERSION="0.6.1"; + +my $debug=undef; +my $HEADER=undef; +my $IGNORE_CASE=undef; +my $FIELD_SEP=undef; +my $FILE1_KEY_COLUMN=1; +my $FILE2_KEY_COLUMN=1; +my @OUTPUT_SPECIFIERS=(); +my $OUTPUT_FORMAT=undef; +my $EMPTY_FILLER=undef; +my $SORT_BUFFER_SIZE=undef; +my $SORT_TEMP_DIR=undef; +my $input_filename1; +my $input_filename2; + +## +## Program Start +## +$ENV{'LANG'}="C";## "C" locale is critical for sorting and joining correctly +parse_commandline_options(); +my (undef, $tmp_filename1) = tempfile(OPEN=>0); +my (undef, $tmp_filename2) = tempfile(OPEN=>0); +sort_file($input_filename1, $tmp_filename1, $FILE1_KEY_COLUMN); +sort_file($input_filename2, $tmp_filename2, $FILE2_KEY_COLUMN); +my $join_exit_code = join_files($tmp_filename1, $tmp_filename2); +cleanup_files($tmp_filename1, $tmp_filename2); +exit($join_exit_code); + +## +## Program end +## + + +sub show_help() +{ +print< + This will show all values (paired and unpared) from both files, + Automatically formatting the columns, and using TAB as field separator. + You can override the empty filler (-e X) on the command line. + + --allh = Short-cut for: + -a 1 -a 2 -o auto -e . -t --header + Same as above, but will also respect the header line from both input files. + +JOIN-OPTIONS: + All of GNU join options are supported. + Run: + join --help + To see all possible joining options. + +SORT-OPTIONS: + The following options are supported for the intermediate sorting step: + + -S SIZE + --buffer-size SIZE = GNU sort's --buffer-size option. + + -T DIR + --temporary-directory DIR = GNU sort's --temporary-directory option. + + Run: + sort --help + To learn about these options. They might improve sorting performances for big files. + +FILE1 FILE2: + The two input files to be sorted, joined. + Unlike GNU join, joining STDIN is not supported. Both files must be real files. + + +NOTE About "--header" and "--auto-format": + The "--header" feature requires GNU coreutils version 8.6 or later. + The "-o auto" feature requires GNU coreutils version 8.10 or later. + +EOF + exit(0); +} + +sub show_version() +{ +print< sub { push @OUTPUT_SPECIFIERS, '-a', $_[1] }, + "e=s" => \$EMPTY_FILLER, + "ignore-case|i" => \$IGNORE_CASE, + "j=i" => sub { $FILE1_KEY_COLUMN = $_[1] ; $FILE2_KEY_COLUMN = $_[1] ; }, + "o=s" => \$OUTPUT_FORMAT, + "t=s" => \$FIELD_SEP, + "v=i" => sub { push @OUTPUT_SPECIFIERS, '-v', $_[1] }, + "1=i" => \$FILE1_KEY_COLUMN, + "2=i" => \$FILE2_KEY_COLUMN, + "debug" => \$debug, + "header" => \$HEADER, + "help" => \&show_help, + "version" => \&show_version, + "examples" => \&show_examples, + "buffer-size|S=s" => \$SORT_BUFFER_SIZE, + "temporary-directory|T=s" => \$SORT_TEMP_DIR, + "all" => sub { + push @OUTPUT_SPECIFIERS, "-a", 1, "-a", 2; + $FIELD_SEP = "\t"; + $OUTPUT_FORMAT = "auto"; + $EMPTY_FILLER = "." unless defined $EMPTY_FILLER; + }, + "allh" => sub { + push @OUTPUT_SPECIFIERS, "-a", 1, "-a", 2; + $FIELD_SEP = "\t"; + $OUTPUT_FORMAT = "auto"; + $HEADER=1; + $EMPTY_FILLER = "." unless defined $EMPTY_FILLER; + }, + ); + die "$PROGRAM: invalid command-line arguments.\n" unless $rc; + + ## We need two file names to join + my @INPUT_FILES = @ARGV; + die "$PROGRAM: missing operand: two file names to join\n" if (scalar(@INPUT_FILES)<2); + die "$PROGRAM: error: too many files specified (can only join two files)\n" if (scalar(@INPUT_FILES)>2); + die "$PROGRAM: error: input file can't be STDIN, please use a real file name.\n" if $INPUT_FILES[0] eq "-" || $INPUT_FILES[1] eq "-"; + die "$PROGRAM: error: input file 1 '" . $INPUT_FILES[0] . "' not found!" unless -e $INPUT_FILES[0]; + die "$PROGRAM: error: input file 2 '" . $INPUT_FILES[1] . "' not found!" unless -e $INPUT_FILES[1]; + + $input_filename1 = $INPUT_FILES[0]; + $input_filename2 = $INPUT_FILES[1]; +} + +sub sort_file($$$) +{ + my ($input_filename, $output_filename, $key_column) = @_; + + my @SORT_COMMAND; + push @SORT_COMMAND, $HEADER ? "./sort-header" : "sort" ; + push @SORT_COMMAND, "-f" if $IGNORE_CASE; + push @SORT_COMMAND, "-k${key_column},${key_column}" ; + push @SORT_COMMAND, "--buffer-size", $SORT_BUFFER_SIZE if $SORT_BUFFER_SIZE; + push @SORT_COMMAND, "--temporary-directory", $SORT_TEMP_DIR if $SORT_TEMP_DIR; + push @SORT_COMMAND, "--output", $output_filename; + push @SORT_COMMAND, "--debugheader" if $debug && $HEADER; + push @SORT_COMMAND, "-t", $FIELD_SEP if $FIELD_SEP; + push @SORT_COMMAND, $input_filename; + + if ($debug) { + warn "$PROGRAM: Running sort on '$input_filename' => '$output_filename'\n"; + warn "$PROGRAM: Sort command line:\n"; + print STDERR Dumper(\@SORT_COMMAND), "\n"; + } + + my $sort_exit_code=1; + system(@SORT_COMMAND); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'sort': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n"; + } + else { + $sort_exit_code = ($? >> 8); + } + die "$PROGRAM: Error: 'sort' process failed, exit code $sort_exit_code\n" if $sort_exit_code!=0; +} + +sub join_files($$) +{ + my ($file1, $file2) = @_; + + my @join_command = qw/join/; + push @join_command, "--header" if $HEADER; + push @join_command, "--ignore-case" if $IGNORE_CASE; + push @join_command, "-t", $FIELD_SEP if $FIELD_SEP; + push @join_command, "-1", $FILE1_KEY_COLUMN if $FILE1_KEY_COLUMN; + push @join_command, "-2", $FILE2_KEY_COLUMN if $FILE2_KEY_COLUMN; + push @join_command, "-e", $EMPTY_FILLER if defined $EMPTY_FILLER; + push @join_command, "-o", $OUTPUT_FORMAT if $OUTPUT_FORMAT; + push @join_command, @OUTPUT_SPECIFIERS; + push @join_command, $file1, $file2; + + if ($debug) { + warn "$PROGRAM: Running join on '$file1' and '$file2'\n"; + warn "$PROGRAM: join command line:\n"; + print STDERR Dumper(\@join_command), "\n"; + } + + my $join_exit_code=1; + system(@join_command); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'join': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if join was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'join' child-process died with signal $signal\n"; + } + else { + $join_exit_code = ($? >> 8); + } + return $join_exit_code; +} + +sub cleanup_files(@) +{ + my (@files) = @_; + + foreach my $file (@files) { + if ($debug) { + warn "$PROGRAM: debug mode, not deleting temporary file '$file'\n"; + } else { + my $count = unlink $file; + warn "$PROGRAM: Error: failed to delete temporary file '$file': $!\n" if ($count != 1); + } + } +} diff -r 000000000000 -r 5314e5d6f040 easyjoin.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/easyjoin.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,115 @@ + + two files + + macros.xml + + + TP_SCRIPT_PATH + + join --version | head -n 1 + + '$output' +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 find_and_replace --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/find_and_replace Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,202 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Getopt::Std; + +sub parse_command_line(); +sub build_regex_string(); +sub usage(); + +my $input_file ; +my $output_file; +my $find_pattern ; +my $replace_pattern ; +my $find_complete_words ; +my $find_pattern_is_regex ; +my $find_in_specific_column ; +my $find_case_insensitive ; +my $replace_global ; +my $skip_first_line ; + + +## +## Program Start +## +usage() if @ARGV<2; +parse_command_line(); +my $regex_string = build_regex_string() ; + +# Allow first line to pass without filtering? +if ( $skip_first_line ) { + my $line = <$input_file>; + print $output_file $line ; +} + + +## +## Main loop +## + +## I LOVE PERL (and hate it, at the same time...) +## +## So what's going on with the self-compiling perl code? +## +## 1. The program gets the find-pattern and the replace-pattern from the user (as strings). +## 2. If both the find-pattern and replace-pattern are simple strings (not regex), +## it would be possible to pre-compile a regex (with qr//) and use it in a 's///' +## 3. If the find-pattern is a regex but the replace-pattern is a simple text string (with out back-references) +## it is still possible to pre-compile the regex and use it in a 's///' +## However, +## 4. If the replace-pattern contains back-references, pre-compiling is not possible. +## (in perl, you can't precompile a substitute regex). +## See these examples: +## http://www.perlmonks.org/?node_id=84420 +## http://stackoverflow.com/questions/125171/passing-a-regex-substitution-as-a-variable-in-perl +## +## The solution: +## we build the regex string as valid perl code (in 'build_regex()', stored in $regex_string ), +## Then eval() a new perl code that contains the substitution regex as inlined code. +## Gotta love perl! + +my $perl_program ; +if ( $find_in_specific_column ) { + # Find & replace in specific column + + $perl_program = < ) { + chomp ; + my \@columns = split ; + + #not enough columns in this line - skip it + next if ( \@columns < $find_in_specific_column ) ; + + \$columns [ $find_in_specific_column - 1 ] =~ $regex_string ; + + print STDOUT join("\t", \@columns), "\n" ; + } +EOF + +} else { + # Find & replace the entire line + $perl_program = < ) { + $regex_string ; + print STDOUT; + } +EOF +} + + +# The dynamic perl code reads from STDIN and writes to STDOUT, +# so connect these handles (if the user didn't specifiy input / output +# file names, these might be already be STDIN/OUT, so the whole could be a no-op). +*STDIN = $input_file ; +*STDOUT = $output_file ; +eval $perl_program ; + + +## +## Program end +## + + +sub parse_command_line() +{ + my %opts ; + getopts('grsiwc:o:', \%opts) or die "$0: Invalid option specified\n"; + + die "$0: missing Find-Pattern argument\n" if (@ARGV==0); + $find_pattern = $ARGV[0]; + die "$0: missing Replace-Pattern argument\n" if (@ARGV==1); + $replace_pattern = $ARGV[1]; + + $find_complete_words = ( exists $opts{w} ) ; + $find_case_insensitive = ( exists $opts{i} ) ; + $skip_first_line = ( exists $opts{s} ) ; + $find_pattern_is_regex = ( exists $opts{r} ) ; + $replace_global = ( exists $opts{g} ) ; + + # Search in specific column ? + if ( defined $opts{c} ) { + $find_in_specific_column = $opts{c}; + + die "$0: invalid column number ($find_in_specific_column).\n" + unless $find_in_specific_column =~ /^\d+$/ ; + + die "$0: invalid column number ($find_in_specific_column).\n" + if $find_in_specific_column <= 0; + } + else { + $find_in_specific_column = 0 ; + } + + # Output File specified (instead of STDOUT) ? + if ( defined $opts{o} ) { + my $filename = $opts{o}; + open $output_file, ">$filename" or die "$0: Failed to create output file '$filename': $!\n" ; + } else { + $output_file = *STDOUT ; + } + + + # Input file Specified (instead of STDIN) ? + if ( @ARGV>2 ) { + my $filename = $ARGV[2]; + open $input_file, "<$filename" or die "$0: Failed to open input file '$filename': $!\n" ; + } else { + $input_file = *STDIN; + } +} + +sub build_regex_string() +{ + my $find_string ; + my $replace_string ; + + if ( $find_pattern_is_regex ) { + $find_string = $find_pattern ; + $replace_string = $replace_pattern ; + } else { + $find_string = quotemeta $find_pattern ; + $replace_string = quotemeta $replace_pattern; + } + + if ( $find_complete_words ) { + $find_string = "\\b($find_string)\\b"; + } + + my $regex_string = "s/$find_string/$replace_string/"; + + $regex_string .= "i" if ( $find_case_insensitive ); + $regex_string .= "g" if ( $replace_global ) ; + + + return $regex_string; +} + +sub usage() +{ +print < + parts of text + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 grep.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/grep.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,165 @@ + + (grep) + + macros.xml + + + gnu_grep + TP_SCRIPT_PATH + + grep --version | head -n 1 + + "${output}" + #else: + grep + -P + -A $lines_after + -B $lines_before + $invert + $case_sensitive + -- "${url_paste}" + '${infile}' | grep -v "^--$" > "${output}" + #end if + + ##grep_wrapper.sh '$infile' '$output' '$url_paste' $color -A $lines_after -B $lines_before $invert $case_sensitive +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 head.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/head.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,62 @@ + + lines from a dataset (head) + + macros.xml + + + head --version | head -n 1 + + '${outfile}' +]]> + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,37 @@ + + + + gnu_coreutils + + + + 1.0 + + + + + + + + + + + +`_ + +Assaf Gordon (gordon cshl dot edu) +]]> + + + + + + + diff -r 000000000000 -r 5314e5d6f040 multijoin --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multijoin Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,321 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Getopt::Long qw(:config no_ignore_case); +use Data::Dumper; +use Carp; +use File::Basename; +use Sort::Key::Natural qw(natsort); + +my $version = "0.1.1"; +my $field_sep = "\t"; +my $key_column; +my @values_columns; +my $max_value_column; +my @input_files; +my $input_headers ; +my $output_headers; +my $filler = "0"; +my $filler_string ; +my $ignore_duplicates; +my $debug = 0 ; +my %input_headers; +my $have_file_labels; +my %file_labels; + +sub parse_command_line_parameters(); +sub show_help(); +sub read_input_file($); +sub print_combined_data(); +sub sanitize_filename($); +sub print_output_header(); +sub show_examples(); + +## +## Program Start +## + +parse_command_line_parameters(); + +my %data; +foreach my $file (@input_files) { + read_input_file($file); +} +#print STDERR Dumper(\%input_headers),"\n"; +#print STDERR Dumper(\%data) if $debug; +print_output_header() if $output_headers; +print_combined_data(); + + +## +## Program End +## +sub print_output_header() +{ + my @output = ("key"); + foreach my $file ( @input_files ) { + foreach my $column ( @values_columns ) { + my $column_name = ( exists $input_headers{$file}->{$column} ) ? + $input_headers{$file}->{$column} : + "V$column" ; + + push @output, $file_labels{$file} . "_" . $column_name; + } + } + print join($field_sep,@output),"\n" + or die "Output error: can't write output line: $!\n"; +} + +sub print_combined_data() +{ + my @keys = natsort keys %data ; + + foreach my $key ( @keys ) { + my @outputs; + + foreach my $file (@input_files) { + push @outputs, + (exists $data{$key}->{$file}) ? $data{$key}->{$file} : $filler_string; + } + + print join($field_sep,$key,@outputs),"\n" + or die "Output error: can't write output line: $!\n"; + } +} + +sub sanitize_filename($) +{ + my ($filename) = shift or croak "missing file name"; + my $file_ID = basename($filename); + $file_ID =~ s/\.\w+$//; # remove extension + $file_ID =~ s/^[^\w\.\-]+//; + $file_ID =~ s/[^\w\.\-]+$//; + $file_ID =~ s/[^\w\.\-]+/_/g; # sanitize bad characters + return $file_ID; +} + +sub read_input_file($) +{ + my ($filename) = shift or croak "Missing input file name"; + + my @value_indexes = map { $_-1 } @values_columns; #zero-based indexes for value columns + + open FILE, "<", $filename + or die "Error: can't open file '$filename': $!\n"; + + ## Read file's header + if ($input_headers) { + my $line = ; + chomp $line; + my @fields = split $field_sep, $line; + + my $num_input_fields = scalar(@fields); + die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ; + + foreach my $col (@values_columns) { + $input_headers{$filename}->{$col} = $fields[$col-1] ; + } + } + + + ## Read file's data + while ( my $line = ) { + chomp $line; + my @fields = split $field_sep, $line; + + my $num_input_fields = scalar(@fields); + die "Input error: file '$filename' line $. doesn't have enough columns (key column = $key_column, line has only $num_input_fields columns)\n" if $num_input_fields < $key_column ; + die "Input error: file '$filename' line $. doesn't have enough columns (value column = $max_value_column, line has only $num_input_fields columns)\n" if $num_input_fields < $max_value_column ; + + + my $key = $fields[$key_column-1]; + my $value = join($field_sep, @fields[@value_indexes]); + + die "Input error: file '$filename' line $. have duplicated key '$key'.\n" + if (exists $data{$key}->{$filename} && !$ignore_duplicates) ; + $data{$key}->{$filename} = $value; + } + close FILE + or die "Error: can't write and close file '$filename': $!\n"; +} + +sub parse_command_line_parameters() +{ + my $values_columns_string; + + my $rc = GetOptions("help" => \&show_help, + "key|k=i" => \$key_column, + "values|v=s" => \$values_columns_string, + "t=s" => \$field_sep, + "in-header" => \$input_headers, + "out-header|h" => \$output_headers, + "H" => sub { $input_headers = 1 ; $output_headers = 1 ; }, + "ignore-dups" => \$ignore_duplicates, + "filler|f=s" => \$filler, + "examples" => \&show_examples, + "labels" => \$have_file_labels, + ); + die "Error: inalid command-line parameters.\n" unless $rc; + + die "Error: missing key column. use --key N. see --help for more details.\n" unless defined $key_column; + die "Error: Invalid key column ($key_column). Must be bigger than zero. see --help for more details.\n" if $key_column <= 0 ; + + die "Error: missing values column. use --values V1,V2,Vn. See --help for more details.\n" unless defined $values_columns_string; + @values_columns = split(/\s*,\s*/, $values_columns_string); + + die "Error: missing values column. use --values N,N,N. see --help for more details.\n" unless scalar(@values_columns)>0; + foreach my $v (@values_columns) { + die "Error: invalid value column ($v), please use only numbers>=1. see --help for more details.\n" + unless $v =~ /^\d+$/ && $v>=1; + + $max_value_column = $v unless defined $max_value_column && $max_value_column>$v; + } + + $filler_string = join($field_sep, map { $filler } @values_columns); + + + if ($have_file_labels) { + ## have file labels - each pair of parameters is a file/label pair. + die "Error: missing input files and labels\n" if scalar(@ARGV)==0; + die "Error: when using --labels, a pair of file names + labels is required (got odd number of argiments)\n" unless scalar(@ARGV)%2==0; + + while (@ARGV) { + my $filename = shift @ARGV; + my $label = shift @ARGV; + $label =~ s/^[^\.\w\-]+//; + $label =~ s/[^\.\w\-]+$//g; + $label =~ s/[^\.\w\-]+/_/g; + + my $file_ID = sanitize_filename($filename); + $file_labels{$filename} = $label; + push @input_files, $filename; + } + } else { + ## no file labels - the rest of the arguments are just file names; + @input_files = @ARGV; + die "Error: missing input files\n" if scalar(@input_files)==0; + die "Error: need more than one input file to join.\n" if scalar(@input_files)==1; + + foreach my $file (@input_files) { + my $file_ID = sanitize_filename($file); + $file_labels{$file} = $file_ID; + } + } + +} + +sub show_help() +{ + print< AAA.txt <== +chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 +chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 +chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 +chr4 995793 996435 FBtr0111046 0 + 7 166 642 +chr4 995793 997931 FBtr0111044 0 + 28 683 2138 +chr4 995793 997931 FBtr0111045 0 + 28 683 2138 +chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 + +==> BBB.txt <== +chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 +chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 +chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 +chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 +chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 +chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 +chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 +chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 +chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 + +==> CCC.txt <== +chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 +chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 +chr4 995793 996435 FBtr0111046 0 + 5 304 642 +chr4 995793 997931 FBtr0111044 0 + 17 714 2138 +chr4 995793 997931 FBtr0111045 0 + 17 714 2138 +chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 + +\$ multijoin -h --key 4 --values 7,8,9 *.txt | head -n 10 +key AAA__V7 AAA__V8 AAA__V9 BBB__V7 BBB__V8 BBB__V9 CCC__V7 CCC__V8 CCC__V9 +FBtr0089116 0 0 0 56 1296 15144 0 0 0 +FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 +FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0111044 28 683 2138 0 0 0 17 714 2138 +FBtr0111045 28 683 2138 0 0 0 17 714 2138 +FBtr0111046 7 166 642 0 0 0 5 304 642 +FBtr0300796 0 0 0 56 1296 14475 0 0 0 + + + +EOF + exit(0); +} diff -r 000000000000 -r 5314e5d6f040 multijoin.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multijoin.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,133 @@ + + (combine multiple files) + + macros.xml + + + perl + text_processing_perl_packages + + + '$outfile' +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,124 @@ +Galaxy wrappers for common unix text-processing tools +===================================================== + +The initial work was done by Assaf Gordon and Greg Hannon's lab ( http://hannonlab.cshl.edu ) +in Cold Spring Harbor Laboratory ( http://www.cshl.edu ). In late 2013 maintainence and +further development was taken over by Bjoern Gruening. Feel free to contribute any general purpose +text manipulation tool to this repository. + + +Tools: +------ + + * awk - The AWK programmning language ( http://www.gnu.org/software/gawk/ ) + * sed - Stream Editor ( http://sed.sf.net ) + * grep - Search files ( http://www.gnu.org/software/grep/ ) + * sort_columns - Sorting every line according to there columns + * GNU Coreutils programs ( http://www.gnu.org/software/coreutils/ ): + + * sort - sort files + * join - join two files, based on common key field. + * cut - keep/discard fields from a file + * unsorted_uniq - keep unique/duplicated lines in a file + * sorted_uniq - keep unique/duplicated lines in a file + * head - keep the first X lines in a file. + * tail - keep the last X lines in a file. + * unfold_column - unfold a column with multiple entities into multiple lines + + +Few improvements over the standard tools: +----------------------------------------- + + * EasyJoin - A Join tool that does not require pre-sorted the files ( https://github.com/agordon/filo/blob/scripts/src/scripts/easyjoin ) + * Multi-Join - Join multiple (>2) files ( https://github.com/agordon/filo/blob/scripts/src/scripts/multijoin ) + * Find_and_Replace - Find/Replace text in a line or specific column. + * Grep with Perl syntax - uses grep with Perl-Compatible regular expressions. + * HTML'd Grep - grep text in a file, and produced high-lighted HTML output, for easier viewing ( uses https://github.com/agordon/filo/blob/scripts/src/scripts/sort-header ) + + +Requirements: +------------- + + * Coreutils vesion 8.22 or later. + * AWK version 4.0.1 or later. + * SED version 4.2 *with* a special patch + * Grep with PCRE support + +All dependencies will be installed automatically with the Galaxy `Tool Shed`_ and the following repository: https://toolshed.g2.bx.psu.edu/view/bgruening/text_processing + + +------------------- +NOTE About Security +------------------- + +The included tools are secure (barring unintentional bugs): +The main concern might be executing system commands with awk's "system" and sed's "e" commands, +or reading/writing arbitrary files with awk's redirection and sed's "r/w" commands. +These commands are DISABLED using the "--sandbox" parameter to awk and sed. + +User trying to run an awk program similar to:: + + BEGIN { system("ls") } + +Will get an error (in Galaxy) saying:: + + fatal: 'system' function not allowed in sandbox mode. + +User trying to run a SED program similar to:: + + 1els + +will get an error (in Galaxy) saying:: + + sed: -e expression #1, char 2: e/r/w commands disabled in sandbox mode + +That being said, if you do find some vulnerability in these tools, please let me know and I'll try fix them. + +------------ +Installation +------------ + +Should be done via the Galaxy `Tool Shed`_. +Install the following repository: https://toolshed.g2.bx.psu.edu/view/bgruening/text_processing + +.. _`Tool Shed`: http://wiki.galaxyproject.org/Tool%20Shed + + +---- +TODO +---- + + * add shuf, we can remove the random feature from sort and use shuf instead + * move some advanced settings under a conditional, for example the cut tools offers to cut bytes + * cut wrapper has some output conditional magic for interval files, that needs to be checked + * comm wrapper, see the Galaxy default one + * evaluate the join wrappers against the Galaxy ones, maybe we should drop them + + +------- +License +------- + + * Copyright (c) 2009-2013 A. Gordon (gordon cshl dot edu) + * Copyright (c) 2013-2015 B. Gruening (bjoern dot gruening gmail dot com) + + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff -r 000000000000 -r 5314e5d6f040 recurring_lines.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/recurring_lines.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,78 @@ + + with recurring lines + + macros.xml + + + yes --version | head -n 1 + +/dev/null | head -n \$times >> $outfile; + #end for +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 replace_text_in_column.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_text_in_column.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,135 @@ + + in a specific column + + macros.xml + + + gnu_awk + + awk --version | head -n 1 + + "$outfile" +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 replace_text_in_line.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_text_in_line.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,132 @@ + + in entire line + + macros.xml + + + gnu_sed + + sed --version | head -n 1 + + "$outfile" +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 sed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sed.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,133 @@ + + with sed + + macros.xml + + + gnu_sed + + sed --version | head -n 1 + + '$output' +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + $code + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 sort-header --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort-header Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,281 @@ +#!/usr/bin/env perl +## +## Sort-header - wrapper for GNU sort with header-line support +## +## Copyright(C) A. Gordon +## license AGPLv3+ +## +use strict; +use warnings; +use Data::Dumper; +use IO::Handle; +use Getopt::Long qw(:config bundling no_ignore_case_always); + +## Forward declarations +sub add_standard_sort_param(@); +sub add_standard_sort_param_value(@); +sub forbidden_sort_param(@); +sub show_help(); +sub show_version(); +sub show_examples(); +sub parse_commandline_options(); +sub reassign_input_output(); +sub process_header_lines(); +sub run_sort(); +sub read_line_non_buffered(); + + +## +## Runtime options +## +my $PROGRAM="sort-header"; +my $VERSION=0.4; + +my $check_only=undef; +my $input_file=undef; +my $output_file=undef; +my $field_separator=undef; +my $header_lines =1 ; +my $debug=undef; +my $sort_exit_code=1; #by default, assume some error + +my @sort_options; + +## +## Program Start +## +parse_commandline_options(); +reassign_input_output(); +process_header_lines(); +run_sort(); +exit($sort_exit_code); +## +## Program End +## + +sub show_examples() +{ +print< ${PROGRAM} can only sort one file, not merge multiple files. + -c -C --check => Currently not supported + --files0-from => Currently not supported + -z --zero-terminated => Currently not supported + +INPUT-FILE: + If INPUT-FILE is not specified, $PROGRAM will use STDIN (just like GNU sort). + +EOF + exit(0); +} + +sub show_version() +{ +print< \&add_standard_sort_param, + "dictionary-order|d" => \&add_standard_sort_param, + "ignore-case|f" => \&add_standard_sort_param, + "general-numeric-sort|g" => \&add_standard_sort_param, + "ignore-nonprinting|i" => \&add_standard_sort_param, + "month-sort|M" => \&add_standard_sort_param, + "human-numeric-sort|h" => \&add_standard_sort_param, + "numeric-sort|n" => \&add_standard_sort_param, + "random-source=s" => \&add_standard_sort_param_value, + "random-sort|R" => \&add_standard_sort_param, + "reverse|r" => \&add_standard_sort_param, + "sort=s" => \&add_standard_sort_param_value, + "version-sort|V" => \&add_standard_sort_param, + + "check|c" => \&forbidden_sort_param, + "C" => \&forbidden_sort_param, + "compress-program=s" => \&add_standard_sort_param_value, + "debug" => \&add_standard_sort_param, + + "files0-from=s" => \&forbidden_sort_param, + + "key|k=s" => \&add_standard_sort_param_value, + "merge|m" => \&forbidden_sort_param, + "batch-size=i" => \&forbidden_sort_param, + + "parallel=i" => \&add_standard_sort_param_value, + + "output|o=s" => \$output_file, + + "stable|s" => \&add_standard_sort_param, + "buffer-size|S=s" => \&add_standard_sort_param_value, + + "field-separator|t=s" => \&add_standard_sort_param_value, + "temporary-directory|T=s" => \&add_standard_sort_param_value, + "unique|u" => \&add_standard_sort_param, + + "zero-terminated|z" => \&forbidden_sort_param, + + "help" => \&show_help, + "version" => \&show_version, + "examples" => \&show_examples, + + "header=i" => \$header_lines, + "debugheader" => \$debug, + ); + + exit 1 unless $rc; + + my @INPUT_FILES = @ARGV; + + die "$PROGRAM: error: invalid number of header lines ($header_lines)\n" unless $header_lines>=0; + die "$PROGRAM: error: Multiple input files specified. This program can sort only a signle file.\n" if (scalar(@INPUT_FILES)>1); + $input_file = shift @INPUT_FILES if scalar(@INPUT_FILES)==1; + + if ($debug) { + warn "$PROGRAM: number of header lines = $header_lines\n"; + warn "$PROGRAM: PASS-to-Sort options:\n", Dumper(\@sort_options), "\n"; + } +} + +sub reassign_input_output() +{ + if ($output_file) { + warn "$PROGRAM: Re-assigning STDOUT to '$output_file'\n" if $debug; + open OUTPUT, '>', $output_file or die "$PROGRAM: Error: failed to create output file '$output_file': $!\n"; + STDOUT->fdopen(\*OUTPUT, 'w') or die "$PROGRAM: Error: failed to reassign STDOUT to '$output_file': $!\n"; + } + + + if ($input_file) { + warn "$PROGRAM: Re-assigning STDIN to '$input_file'\n" if $debug; + open INPUT, '<', $input_file or die "$PROGRAM: Error: failed to open input file '$input_file': $!\n"; + STDIN->fdopen(\*INPUT, 'r') or die "$PROGRAM: Error: failed to reassign STDIN to '$input_file': $!\n"; + } +} + +sub process_header_lines() +{ + warn "$PROGRAM: Reading $header_lines header lines...\n" if $debug; + for (my $i=0; $i<$header_lines; $i++) { + my $line = read_line_non_buffered(); + exit unless defined $line; + print $line; + } +} + +sub run_sort() +{ + warn "$PROGRAM: Running GNU sort...\n" if $debug; + system('sort', @sort_options); + if ($? == -1) { + die "$PROGRAM: Error: failed to execute 'sort': $!\n"; + } + elsif ($? & 127) { + my $signal = ($? & 127); + kill 2, $$ if $signal == 2; ##if sort was interrupted (CTRL-C) - just pass it on and commit suicide + die "$PROGRAM: Error: 'sort' child-process died with signal $signal\n"; + } + else { + $sort_exit_code = ($? >> 8); + } +} + + +sub add_standard_sort_param(@) +{ + my ($obj)= @_; + add_standard_sort_param_value($obj, undef); +} + +sub add_standard_sort_param_value(@) +{ + my ($obj,$value)= @_; + + my $option = "" . $obj ; #stringify the optino object, get the option name. + + if (length($option)==1) { + $option = "-" . $option ; + } else { + $option = "--" . $option ; + } + push @sort_options, $option ; + push @sort_options, $value if $value; +} + +sub forbidden_sort_param(@) +{ + my ($obj,$value)= @_; + my $option = "" . $obj ; #stringify the optino object, get the option name. + + die "$PROGRAM: Error: option '$option' can not be used with this program. If you must use it, run GNU sort directly. see --help for more details.\n"; +} + +sub read_line_non_buffered() +{ + my $line = ''; + while ( 1 ) { + my $c; + my $rc = sysread STDIN, $c, 1; + die "$PROGRAM: STDIN Read error: $!" unless defined $rc; + return $line if $rc==0 && $line; + return undef if $rc==0 && (!$line); + $line .= $c ; + return $line if ( $c eq "\n"); + } +} + diff -r 000000000000 -r 5314e5d6f040 sort.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,176 @@ + + data in ascending or descending order + + macros.xml + + + gnu_sed + + sort --version | head -n 1 + + 0: + sed -u '${header}'q && + #end if + + sort $unique $ignore_case --stable -t ' ' + + #for $key in $sortkeys: + -k '${key.column}${key.order}${key.style},${key.column}' + #end for + + ) < '${infile}' > '${outfile}' +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2M > 3K > 400) +* **Random order**: return lines in random order. + +------ + +**Example - Header line** + +**Input file** (note first line is a header line, should not be sorted):: + + Fruit Color Price + Banana Yellow 4.1 + Avocado Green 8.0 + Apple Red 3.0 + Melon Green 6.1 + +**Sorting** by **numeric order** on column **3**, with **header**, will return:: + + Fruit Color Price + Apple Red 3.0 + Banana Yellow 4.1 + Melon Green 6.1 + Avocado Green 8.0 + + +----- + +**Example - Natural vs. Alphabetical sorting** + +Given the following list:: + + chr4 + chr13 + chr1 + chr10 + chr20 + chr2 + +**Alphabetical sort** would produce the following sorted list:: + + chr1 + chr10 + chr13 + chr2 + chr20 + chr4 + +**Natural Sort** would produce the following sorted list:: + + chr1 + chr2 + chr4 + chr10 + chr13 + chr20 + + +.. class:: infomark + +If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**, not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs. + +@REFERENCES@ +]]> + + diff -r 000000000000 -r 5314e5d6f040 sort_rows.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sort_rows.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,37 @@ + + according to their columns + + macros.xml + + + $outfile +]]> + + + + + + + + + + + + + + + +Convert* + +**What it does** + +That tool sorts each row in a TAB separated file, according to their columns. In other words: It is a sorted reordering of all columns. + +@REFERENCES@ +]]> + + diff -r 000000000000 -r 5314e5d6f040 sorted_uniq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sorted_uniq.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,105 @@ + + assuming sorted input file + + macros.xml + + + gnu_sed + + uniq --version | head -n 1 + + "$outfile" +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 tac.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tac.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,90 @@ + + reverse a file (reverse cat) + + macros.xml + + + tac --version | head -n 1 + + "$outfile" +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 tail.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tail.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,67 @@ + + lines from a dataset (tail) + + macros.xml + + + tail --version | head -n 1 + + '$outfile' +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 test-data/1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,65 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - diff -r 000000000000 -r 5314e5d6f040 test-data/awk1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +chr10 0.4 +chr1 1.4 +chrM 3e-1 +chr2 1.1e2 +chr15 3.14e-2 +chr15 0.0314 +chr4 0.1 +chr20 0.9 +chr22 +1.3 +chrX -0.3 diff -r 000000000000 -r 5314e5d6f040 test-data/awk_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/awk_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +12.6 chr1 +990 chr2 +8.1 chr20 +11.7 chr22 diff -r 000000000000 -r 5314e5d6f040 test-data/cut1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fruit color weight price +apple red 1.4 0.4 +orange orange 1.1 0.2 +banana yellow 0.9 0.35 diff -r 000000000000 -r 5314e5d6f040 test-data/cut_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fruit weight price +apple 1.4 0.4 +orange 1.1 0.2 +banana 0.9 0.35 diff -r 000000000000 -r 5314e5d6f040 test-data/cut_results2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_results2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fruit weight price +apple 1.4 0.4 +orange 1.1 0.2 +banana 0.9 0.35 diff -r 000000000000 -r 5314e5d6f040 test-data/cut_results3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cut_results3.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +fru +app +ora +ban diff -r 000000000000 -r 5314e5d6f040 test-data/easyjoin1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +Fruit Color +Apple red +Banana yellow +Orange orange +Melon green diff -r 000000000000 -r 5314e5d6f040 test-data/easyjoin2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin2.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +Fruit Price +Orange 7 +Avocado 8 +Apple 4 +Banana 3 diff -r 000000000000 -r 5314e5d6f040 test-data/easyjoin_result1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/easyjoin_result1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,6 @@ +Fruit Color Price +Apple red 4 +Avocado 0 8 +Banana yellow 3 +Melon green 0 +Orange orange 7 diff -r 000000000000 -r 5314e5d6f040 test-data/find_and_replace1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +I have a dream that one day this nation will rise up, and live out the true meaning of its creed: ‘We hold these truths to be self-evident: that all men are created equal.’ +I have a dream that one day on the red hills of Georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at a table of brotherhood. +I have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice and sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice. +I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character. +I have a dream today! diff -r 000000000000 -r 5314e5d6f040 test-data/find_and_replace2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +1 foo chr1 gene +2 bar chr2 luther +3 baz chrMT gene1 diff -r 000000000000 -r 5314e5d6f040 test-data/find_and_replace_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +I have a dream that one great day this nation will rise up, and live out the true meaning of its creed: ‘We hold these truths to be self-evident: that all men are created equal.’ +I have a dream that one great day on the red hills of Georgia the sons of former slaves and the sons of former slave owners will be able to sit down together at a table of brotherhood. +I have a dream that one great day even the state of Mississippi, a state sweltering with the heat of injustice and sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice. +I have a dream that my four little children will one great day live in a nation where they will not be judged by the color of their skin but by the content of their character. +I have a dream today! diff -r 000000000000 -r 5314e5d6f040 test-data/find_and_replace_results2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/find_and_replace_results2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +1 foo 1 gene +2 bar 2 luther +3 baz MT gene1 diff -r 000000000000 -r 5314e5d6f040 test-data/grep1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grep1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,152 @@ +>FC0000042:5:1:220:1502 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:34:1398 +GATCTCAGTCCACCGCTGGGATTAACCTTGCCCCCC +>FC0000042:5:1:164:1396 +TATCTTATAGATATTTCCCTCTATACTAGTGACCCC +>FC0000042:5:1:333:925 +GAGCTTATAGCTTGTTATATACGTCAACCCCCCCCC +>FC0000042:5:1:204:1476 +GTACTTATATAGATACAAAATATGTATAGGATTGTC +>FC0000042:5:1:119:1511 +GATCTGCATGACCTGGGATTTGTTGGACCCCCCCCC +>FC0000042:5:1:202:1487 +CATGTATAGTCTCCAGTCTATACAACAACCCCCCCC +>FC0000042:5:1:182:1434 +GCTATAGAAATGTTAACATCGAATGTACATTATAAC +>FC0000042:5:1:627:866 +AATATAGATATGGGACAAAACACATTTAGACCCCCC +>FC0000042:5:1:24:1357 +GATATAATATCAATATCAATCCACGCTTGTTCCCCC +>FC0000042:5:1:187:1492 +TATAGAAGCAGAAGAAACAACCTACTTTCACATGTT +>FC0000042:5:1:45:1344 +CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC +>FC0000042:5:1:87:1299 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:206:1341 +GATATATAGCAGTGACCACCTCTAAGCCCCCCCCCC +>FC0000042:5:1:144:929 +GCCCTGGCATATTGTCAATATCTTTAAACCCCCCCC +>FC0000042:5:1:662:820 +TGTCTTTTCGATTTTTTTCTTTGCGTCACCCCCCCC +>FC0000042:5:1:53:1507 +GACCTCACTGTGGCATGAATCATACATTCCCCCCCC +>FC0000042:5:1:182:1502 +AATGCTTGGCAAAGCTCAACTTCGTTGCCCCCCCCC +>FC0000042:5:1:194:1423 +GATCCTATAGGTCTCGATTGGTCTTTTATTCTTTTT +>FC0000042:5:1:35:1444 +GCTATAGCACGGCATAGTGCGATACTAGTACCCCCC +>FC0000042:5:1:667:872 +GACTATAGGCGGAATGATAATGTCAAATAAGTAGTT +>FC0000042:5:1:147:1438 +GATCAAGGAGACTAGGGAGGTAGGAGTTACTCCCCC +>FC0000042:5:1:467:510 +GAACCACTATAGTGACATGGAACACGCGTGAACCCC +>FC0000042:5:1:1553:1707 +TATAGTTACCCTACTGGGCCGACGATTCCCTTACGA +>FC0000042:5:1:207:964 +AATCTATAGATTTTTCTATTATTGTGTCCTCACCCC +>FC0000042:5:1:169:1468 +GCTCTATAGTTCGAGTTACCAAACTCTTCCCCCCCC +>FC0000042:5:1:42:1465 +GCTCTTTAGGTTTGAACCTGTAGACTTGAGGGGCAT +>FC0000042:5:1:55:1331 +GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT +>FC0000042:5:1:175:1501 +GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC +>FC0000042:5:1:221:1465 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:196:1450 +AATATAGTCTATCCAACAAGATGTAACCCCCCCCCC +>FC0000042:5:1:86:1413 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:453:514 +GATATCTTCGTTTTATATTGAAACTGGCCCCCCCCC +>FC0000042:5:1:150:1415 +TATAGGGCCCTGTATGGTTGCTTGACTAGGGGCTGC +>FC0000042:5:1:191:1475 +GATCCATCCCAATCTCTACGATTGAAAGCATCGGGA +>FC0000042:5:1:26:1407 +GTTATAGAGGCGGGAAGGTGAGAATGCCCCCCCCCC +>FC0000042:5:1:107:1407 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:388:780 +GATCTATAGCTTCTTTAGCTTGGAAACTGGTCAGCC +>FC0000042:5:1:223:1535 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:145:783 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:449:876 +GACCATCAATCAGGTGGAAAGCAGGGCCCCCCCCCC +>FC0000042:5:1:212:1325 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:194:1485 +GAACCGAATCCAACCTGTTTCATTCCTCAGATCCCC +>FC0000042:5:1:507:494 +GATCTTATAGAATTTTTGACAACATAAGTTACCCCC +>FC0000042:5:1:416:938 +AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC +>FC0000042:5:1:633:480 +GAGCTGTGTGCATCTGTCCTGAGAGAGGCAAGATTT +>FC0000042:5:1:53:1443 +GTAATGTTATAGCTAGGATTTTGGAGTTTGGTCCTC +>FC0000042:5:1:45:915 +GTATAGCAGCCTAATAAGGAGCTGGGGACCCCCCCC +>FC0000042:5:1:39:1343 +GTTCTATTTTCGATAAAACTGAACCACCCCCCCCCC +>FC0000042:5:1:46:1501 +GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT +>FC0000042:5:1:187:1507 +GAACTAATCCTGATTTATACAACGGCTCCCCCCCCC +>FC0000042:5:1:91:1364 +AATTTATAGCCACTCTAATTCCGTTTGGTTCCCCCC +>FC0000042:5:1:1542:1751 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>FC0000042:5:1:146:886 +GATCTACGATGTACCTTACGCCTCCGAGCATCCCCC +>FC0000042:5:1:615:861 +GATCTACATTATAGATAATGAAGTTCCATTTCCCCC +>FC0000042:5:1:52:792 +GATGTGGTATAGAGAGCAATTCGTTGGTTTTGCCCC +>FC0000042:5:1:153:1433 +GGTCTTTCTATAGAACGGAACGATATATTTTTCCCC +>FC0000042:5:1:540:800 +GAGCGAAAGTGATAGATGGAGGACTATATCTGCCCC +>FC0000042:5:1:160:1344 +GGTGTACTATAGCTATTAAGTCCAATCATGATAATA +>FC0000042:5:1:544:413 +GATCTCTGGAAAATATAAACCGGTGACCCCCCCCCC +>FC0000042:5:1:579:895 +AGTCTCGAATCAATGTATTTCATCGTGGTAATCCCC +>FC0000042:5:1:468:495 +TATTGATGCTCCCTGCCTGAAAGATACCCCCCCCCC +>FC0000042:5:1:383:831 +CTTCATGAATCTACTGTTGGCGTTTATTTTATCTGG +>FC0000042:5:1:112:1416 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>FC0000042:5:1:37:1299 +GATCGTGAGCTCTGTACCGGAAGTTCGTGGCTGCCA +>FC0000042:5:1:205:780 +TATAGTGTTCCACAAAGACTAGGTAACGCTTCATTT +>FC0000042:5:1:33:702 +GAACGGACTATAGCCGGTATCCAAACATAAATGTTC +>FC0000042:5:1:54:1019 +AATCGCAGCATTCTGACACACAGGTTTCGGATGTAC +>FC0000042:5:1:587:867 +TATCTAATGTCATATTTTCAGACAAATTACTAGAAA +>FC0000042:5:1:319:990 +GATTTGTAAATTACTTCGAACATAGAAGTTCCCCCC +>FC0000042:5:1:453:829 +GAACTTACGGCATTAAGTTTAATCTTCAGCCACCCC +>FC0000042:5:1:159:1470 +GATCTGATAGTGTTGCGACGTAAATAAGTCCCCCCC +>FC0000042:5:1:487:820 +GATCTCGCAGGGATCAGTTATCCAGGTATTCCCCCC +>FC0000042:5:1:48:371 +AATCTATAATCTTTACCCGAGTTTAAGTCCCCCCCC +>FC0000042:5:1:1346:1739 +GATATAGGTTATACGTTTTTAGTCTTAGAGAAGTTT +>FC0000042:5:1:661:459 +GATCTGCTTTAACGATTGAGGACGATGCCCCCCCCC diff -r 000000000000 -r 5314e5d6f040 test-data/grep_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grep_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +>FC0000042:5:1:182:1434 +GCTATAGAAATGTTAACATCGAATGTACATTATAAC +>FC0000042:5:1:45:1344 +CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC +>FC0000042:5:1:55:1331 +GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT +>FC0000042:5:1:175:1501 +GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC +>FC0000042:5:1:416:938 +AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC +>FC0000042:5:1:46:1501 +GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT +>FC0000042:5:1:33:702 +GAACGGACTATAGCCGGTATCCAAACATAAATGTTC diff -r 000000000000 -r 5314e5d6f040 test-data/grep_results2.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grep_results2.html Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,307 @@ + + + + + + + +
+GCTATAGAAATGTTAACATCGAATGTACATTATAAC
+--
+CAGCTAACAATCAAGCGTTACAGATTAGCCCCCCCC
+--
+GAACTTGCGTAACGTACAAAAATGCAAGCAAAAAGT
+--
+GCTCTGTTAATCTAGAAAATGTGTCTCCCCCCCCCC
+--
+AATCGTATAGCTCGGGCCGGATACTAGTACACCCCC
+--
+GATATAGTGGATAACTAATGCTCCCCCAGAACTGTT
+--
+GAACGGACTATAGCCGGTATCCAAACATAAATGTTC
+
+ + diff -r 000000000000 -r 5314e5d6f040 test-data/head_results1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/head_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - diff -r 000000000000 -r 5314e5d6f040 test-data/join_input1__1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input1__1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,9 @@ +CDKN2A 4 +CDKN2B 5 +DHX37 8 +LOC255 9 +LOC468 3 +OR4M2 12 +ORN4 1 +POTE15 3 +RI3BP 5 diff -r 000000000000 -r 5314e5d6f040 test-data/join_input1__2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input1__2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,7 @@ +CDKN2A 4 +DHX37 8 +HES7 1 +ILKA3 8 +LOC255 9 +MOUB 3 +UTJX 3 diff -r 000000000000 -r 5314e5d6f040 test-data/join_input2__1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input2__1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +Gene Experiment1 +CDKN2A 4 +CDKN2B 5 +DHX37 8 +LOC255 9 +LOC468 3 +OR4M2 12 +ORN4 1 +POTE15 3 +RI3BP 5 diff -r 000000000000 -r 5314e5d6f040 test-data/join_input2__2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_input2__2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +Gene Experiment2 +CDKN2A 4 +DHX37 8 +HES7 1 +ILKA3 8 +LOC255 9 +MOUB 3 +UTJX 3 diff -r 000000000000 -r 5314e5d6f040 test-data/join_output1_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output1_1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,13 @@ +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r 000000000000 -r 5314e5d6f040 test-data/join_output1_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output1_2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r 000000000000 -r 5314e5d6f040 test-data/join_output2_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output2_1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +Gene Experiment1 Experiment2 +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r 000000000000 -r 5314e5d6f040 test-data/join_output2_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/join_output2_2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,11 @@ +Gene Experiment1 Experiment2 +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r 000000000000 -r 5314e5d6f040 test-data/multijoin1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,9 @@ +chr4 888449 890171 FBtr0308778 0 + 266 1527 1722 +chr4 972167 979017 FBtr0310651 0 - 3944 6428 6850 +chr4 972186 979017 FBtr0089229 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089231 0 - 3944 6428 6831 +chr4 972186 979017 FBtr0089233 0 - 3944 6428 6831 +chr4 995793 996435 FBtr0111046 0 + 7 166 642 +chr4 995793 997931 FBtr0111044 0 + 28 683 2138 +chr4 995793 997931 FBtr0111045 0 + 28 683 2138 +chr4 1034029 1047719 FBtr0089223 0 - 5293 13394 13690 diff -r 000000000000 -r 5314e5d6f040 test-data/multijoin2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,9 @@ +chr4 90286 134453 FBtr0309803 0 + 657 29084 44167 +chr4 251355 266499 FBtr0089116 0 + 56 1296 15144 +chr4 252050 266506 FBtr0308086 0 + 56 1296 14456 +chr4 252050 266506 FBtr0308087 0 + 56 1296 14456 +chr4 252053 266528 FBtr0300796 0 + 56 1296 14475 +chr4 252053 266528 FBtr0300800 0 + 56 1296 14475 +chr4 252055 266528 FBtr0300798 0 + 56 1296 14473 +chr4 252055 266528 FBtr0300799 0 + 56 1296 14473 +chr4 252541 266528 FBtr0300797 0 + 56 1296 13987 diff -r 000000000000 -r 5314e5d6f040 test-data/multijoin3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin3.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +chr4 972167 979017 FBtr0310651 0 - 9927 6738 6850 +chr4 972186 979017 FBtr0089229 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089231 0 - 9927 6738 6831 +chr4 972186 979017 FBtr0089233 0 - 9927 6738 6831 +chr4 995793 996435 FBtr0111046 0 + 5 304 642 +chr4 995793 997931 FBtr0111044 0 + 17 714 2138 +chr4 995793 997931 FBtr0111045 0 + 17 714 2138 +chr4 1034029 1047719 FBtr0089223 0 - 17646 13536 13690 diff -r 000000000000 -r 5314e5d6f040 test-data/multijoin_result1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/multijoin_result1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,19 @@ +key dataset_1_V7 dataset_1_V8 dataset_1_V9 dataset_2_V7 dataset_2_V8 dataset_2_V9 dataset_3_V7 dataset_3_V8 dataset_3_V9 +FBtr0089116 0 0 0 56 1296 15144 0 0 0 +FBtr0089223 5293 13394 13690 0 0 0 17646 13536 13690 +FBtr0089229 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089231 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0089233 3944 6428 6831 0 0 0 9927 6738 6831 +FBtr0111044 28 683 2138 0 0 0 17 714 2138 +FBtr0111045 28 683 2138 0 0 0 17 714 2138 +FBtr0111046 7 166 642 0 0 0 5 304 642 +FBtr0300796 0 0 0 56 1296 14475 0 0 0 +FBtr0300797 0 0 0 56 1296 13987 0 0 0 +FBtr0300798 0 0 0 56 1296 14473 0 0 0 +FBtr0300799 0 0 0 56 1296 14473 0 0 0 +FBtr0300800 0 0 0 56 1296 14475 0 0 0 +FBtr0308086 0 0 0 56 1296 14456 0 0 0 +FBtr0308087 0 0 0 56 1296 14456 0 0 0 +FBtr0308778 266 1527 1722 0 0 0 0 0 0 +FBtr0309803 0 0 0 657 29084 44167 0 0 0 +FBtr0310651 3944 6428 6850 0 0 0 9927 6738 6850 diff -r 000000000000 -r 5314e5d6f040 test-data/recurring_result1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/recurring_result1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom diff -r 000000000000 -r 5314e5d6f040 test-data/recurring_result2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/recurring_result2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,20 @@ +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +freedom +war is over +war is over +war is over +war is over +war is over +war is over +war is over +war is over +war is over +war is over diff -r 000000000000 -r 5314e5d6f040 test-data/remove_ending_input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_ending_input1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + +chr7 56772 56792 D17003_CTCF_R7 372 + +chr7 56775 56795 D17003_CTCF_R4 207 + diff -r 000000000000 -r 5314e5d6f040 test-data/remove_ending_output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/remove_ending_output1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + diff -r 000000000000 -r 5314e5d6f040 test-data/replace_text_in_column1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_column1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + diff -r 000000000000 -r 5314e5d6f040 test-data/replace_text_in_column_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_column_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 R6 310 + +chr7 56736 56756 R7 354 + +chr7 56761 56781 R4 220 + diff -r 000000000000 -r 5314e5d6f040 test-data/replace_text_in_line1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_CTCF_R6 310 + +chr7 56736 56756 D17003_CTCF_R7 354 + +chr7 56761 56781 D17003_CTCF_R4 220 + diff -r 000000000000 -r 5314e5d6f040 test-data/replace_text_in_line_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_text_in_line_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +chr7 56632 56652 D17003_FOOBAR_R6 310 + +chr7 56736 56756 D17003_FOOBAR_R7 354 + +chr7 56761 56781 D17003_FOOBAR_R4 220 + diff -r 000000000000 -r 5314e5d6f040 test-data/sed1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sed1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +This is a header line +Lorem ipsum dolor foo sit amet foo, +consectetur adipiscing elit. +Nam foo ut nulla non neque faucibus commodo diff -r 000000000000 -r 5314e5d6f040 test-data/sed_results1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sed_results1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +Lorem ipsum dolor bar sit amet foo, +consectetur adipiscing elit. +Nam bar ut nulla non neque faucibus commodo diff -r 000000000000 -r 5314e5d6f040 test-data/sed_results2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sed_results2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,2 @@ +Lorem ipsum dolor baz sit amet baz, +Nam baz ut nulla non neque faucibus commodo diff -r 000000000000 -r 5314e5d6f040 test-data/sort1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, diff -r 000000000000 -r 5314e5d6f040 test-data/sort2.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort2.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,6 @@ +chr10 100 200 feature1 100.01 + +chr20 800 900 feature2 1.1 + +chr2 500 600 feature3 1000.1 + +chr1 300 400 feature4 1.1e-05 + +chr21 300 500 feature5 1.1e2 + +chr15 700 800 feature6 1.1e4 + diff -r 000000000000 -r 5314e5d6f040 test-data/sort_and_join_input2__1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_input2__1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +Gene Experiment1 +LOC468 3 +CDKN2B 5 +RI3BP 5 +ORN4 1 +POTE15 3 +OR4M2 12 +LOC255 9 +DHX37 8 +CDKN2A 4 diff -r 000000000000 -r 5314e5d6f040 test-data/sort_and_join_input2__2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_input2__2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +Gene Experiment2 +ILKA3 8 +UTJX 3 +HES7 1 +MOUB 3 +LOC255 9 +DHX37 8 +CDKN2A 4 diff -r 000000000000 -r 5314e5d6f040 test-data/sort_and_join_output2_1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_output2_1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +Gene Experiment1 Experiment2 +CDKN2A 4 4 +CDKN2B 5 . +DHX37 8 8 +HES7 . 1 +ILKA3 . 8 +LOC255 9 9 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r 000000000000 -r 5314e5d6f040 test-data/sort_and_join_output2_2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_and_join_output2_2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,11 @@ +Gene Experiment1 Experiment2 +CDKN2B 5 . +HES7 . 1 +ILKA3 . 8 +LOC468 3 . +MOUB . 3 +OR4M2 12 . +ORN4 1 . +POTE15 3 . +RI3BP 5 . +UTJX . 3 diff -r 000000000000 -r 5314e5d6f040 test-data/sort_result1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, diff -r 000000000000 -r 5314e5d6f040 test-data/sort_result2.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result2.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,32 @@ +# comment 1 \n\n''" again +# comment 2 **}"''' special +# comment 3 @n/n""" characters +chr1 147962006 147975713 NM_005997 0 - 147962192 147975670 0 6 574,145,177,115,153,160, 0,1543,7859,9048,9340,13547, +chr1 147984101 148035079 BC007833 0 + 147984545 148033414 0 14 529,32,81,131,118,153,300,206,84,49,85,130,46,1668, 0,25695,28767,33118,33695,33998,35644,38005,39629,40577,41402,43885,48367,49310, +chr1 148077485 148111797 NM_002651 0 - 148078400 148111728 0 12 1097,121,133,266,124,105,110,228,228,45,937,77, 0,2081,2472,6871,9907,10257,11604,14199,15637,18274,23636,34235, +chr1 148185113 148187485 NM_002796 0 + 148185136 148187378 0 7 163,207,147,82,117,89,120, 0,416,877,1199,1674,1977,2252, +chr2 118288484 118306183 NM_006773 0 + 118288583 118304530 0 14 184,285,144,136,101,200,115,140,162,153,114,57,178,1796, 0,2765,4970,6482,6971,7183,7468,9890,10261,10768,11590,14270,14610,15903, +chr2 118389378 118390700 BC005078 0 - 118390395 118390500 0 1 1322, 0, +chr2 220108603 220116964 NM_001927 0 + 220108689 220116217 0 9 664,61,96,162,126,221,44,83,789, 0,1718,1874,2118,2451,2963,5400,7286,7572, +chr2 220229182 220233943 NM_024536 0 - 220229609 220233765 0 4 1687,180,574,492, 0,1990,2660,4269, +chr5 131170738 131357870 AF099740 0 - 131311206 131357817 0 31 112,124,120,81,65,40,120,129,61,88,94,79,72,102,144,117,89,73,96,135,135,78,74,52,33,179,100,102,65,115,248, 0,11593,44117,47607,104668,109739,114675,126366,135488,137518,138009,140437,152389,153373,155388,159269,160793,162981,164403,165577,166119,167611,169501,178260,179675,180901,181658,182260,182953,183706,186884, +chr5 131424245 131426795 NM_000588 0 + 131424298 131426383 0 5 215,42,90,42,535, 0,313,1658,1872,2015, +chr5 131556201 131590458 NM_004199 0 - 131556601 131582218 0 15 471,97,69,66,54,100,71,177,194,240,138,152,97,100,170, 0,2316,2802,5596,6269,11138,11472,15098,16528,17674,21306,24587,25142,25935,34087, +chr5 131621285 131637046 NM_003687 0 + 131621326 131635821 0 7 134,152,82,179,164,118,1430, 0,4915,8770,13221,13609,14097,14331, +chr6 108298214 108386086 NM_007214 0 - 108299600 108385906 0 21 1530,105,99,102,159,174,60,83,148,155,93,133,95,109,51,59,62,113,115,100,304, 0,2490,6246,10831,12670,23164,23520,27331,31052,32526,34311,36130,36365,38609,41028,42398,43048,51479,54500,59097,87568, +chr6 108593954 108616704 NM_003269 0 + 108594662 108615360 0 9 733,146,88,236,147,97,150,106,1507, 0,5400,8778,10445,12037,14265,14749,15488,21243, +chr6 108639410 108689143 NM_152827 0 - 108640045 108688818 0 3 741,125,487, 0,2984,49246, +chr6 108722790 108950942 NM_145315 0 + 108722976 108950321 0 13 325,224,52,102,131,100,59,83,71,101,141,114,750, 0,28931,52094,60760,61796,71339,107102,152319,181970,182297,215317,224802,227402, +chr7 113320332 113924911 AK131266 0 + 113862563 113893433 0 20 285,91,178,90,58,75,138,51,201,178,214,105,88,84,77,102,122,70,164,1124, 0,201692,340175,448290,451999,484480,542213,543265,543478,545201,556083,558358,565876,567599,573029,573245,575738,577123,577946,603455, +chr7 116511232 116557294 NM_003391 0 - 116512159 116556994 0 5 1157,265,278,227,383, 0,20384,37843,43339,45679, +chr7 116713967 116902666 NM_000492 0 + 116714099 116901113 0 27 185,111,109,216,90,164,126,247,93,183,192,95,87,724,129,38,251,80,151,228,101,249,156,90,173,106,1754, 0,24290,29071,50936,54313,55285,56585,60137,62053,68678,79501,107776,110390,111971,114967,122863,123569,126711,130556,131618,134650,147559,162475,172879,184725,185496,186945, +chr7 116944658 117107512 AF377960 0 - 116945541 116979926 0 23 1129,102,133,64,186,206,179,188,153,100,87,80,96,276,118,255,151,100,204,1654,225,108,173, 0,7364,8850,10413,13893,14398,17435,24259,24615,35177,35359,45901,47221,49781,56405,66857,69787,72208,73597,80474,100111,150555,162681, +chr8 118880786 119193239 NM_000127 0 - 118881131 119192466 0 11 531,172,161,90,96,119,133,120,108,94,1735, 0,5355,7850,13505,19068,20309,23098,30863,36077,37741,310718, +chr9 128763240 128783870 NM_174933 0 + 128764156 128783586 0 12 261,118,74,159,76,48,56,63,129,117,127,370, 0,522,875,5630,12374,12603,15040,15175,18961,19191,20037,20260, +chr9 128787362 128789566 NM_014908 0 - 128787519 128789136 0 1 2204, 0, +chr9 128789530 128848928 NM_015354 0 + 128789552 128848511 0 44 54,55,74,85,81,45,93,120,212,115,201,90,66,120,127,153,127,88,77,115,121,67,129,140,107,207,170,70,68,196,78,86,146,182,201,93,159,138,75,228,132,74,130,594, 0,1491,5075,8652,9254,10312,11104,11317,20808,21702,23060,25462,31564,32908,33566,34851,35204,35595,35776,37202,38860,39111,39891,40349,42422,45499,45827,46675,47158,47621,50453,50840,51474,51926,53831,54186,55119,55619,57449,57605,57947,58352,58541,58804, +chr9 128849867 128870133 NM_020145 0 - 128850516 128869987 0 11 757,241,101,90,24,63,93,134,129,142,209, 0,1071,1736,2085,2635,4201,6376,6736,13056,14247,20057, +chrX 122719582 122773357 NM_001167 0 + 122745047 122766566 0 7 96,909,100,79,43,201,6985, 0,25433,28421,31040,32533,40295,46790, +chrX 152648233 152662158 NM_000425 0 - 152648964 152662138 0 28 963,12,73,135,156,120,174,123,202,116,223,71,198,111,125,157,167,112,144,132,185,112,171,123,203,106,11,100, 0,1436,1545,1951,2390,2653,2889,3156,3367,3772,4717,5122,5424,5868,6066,6370,6629,6909,7588,7871,8124,8456,8858,9125,10220,10660,11296,13825, +chrX 152691216 152693487 NM_000054 0 + 152691446 152693029 0 3 255,885,664, 0,616,1607, +chrX 152693677 152712545 NM_001666 0 - 152694029 152712503 0 22 586,100,93,184,74,234,106,135,78,61,103,28,85,192,102,222,129,183,63,163,205,109, 0,1693,2066,2364,2635,2794,3129,3323,3545,3752,5323,5647,5841,6032,6401,11455,11778,13249,13719,13987,14227,18759, diff -r 000000000000 -r 5314e5d6f040 test-data/sort_result3.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_result3.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,6 @@ +chr1 300 400 feature4 1.1e-05 + +chr20 800 900 feature2 1.1 + +chr10 100 200 feature1 100.01 + +chr21 300 500 feature5 1.1e2 + +chr2 500 600 feature3 1000.1 + +chr15 700 800 feature6 1.1e4 + diff -r 000000000000 -r 5314e5d6f040 test-data/sort_rows1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_rows1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +a b c d e f +f e d c b a +7 6 5 4 3 2 +1 2 3 4 5 6 diff -r 000000000000 -r 5314e5d6f040 test-data/sort_rows_results1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort_rows_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,4 @@ +a b c d e f +a b c d e f +2 3 4 5 6 7 +1 2 3 4 5 6 diff -r 000000000000 -r 5314e5d6f040 test-data/sorted_uniq1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,8 @@ +a +A +b +b +B +c +d +e diff -r 000000000000 -r 5314e5d6f040 test-data/sorted_uniq_results1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq_results1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,3 @@ +1 c +1 d +1 e diff -r 000000000000 -r 5314e5d6f040 test-data/sorted_uniq_results2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted_uniq_results2.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,12 @@ +a +A + +b +b +B + +c + +d + +e diff -r 000000000000 -r 5314e5d6f040 test-data/tac_result1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tac_result1.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,65 @@ +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - diff -r 000000000000 -r 5314e5d6f040 test-data/tac_result2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tac_result2.txt Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,66 @@ + + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - \ No newline at end of file diff -r 000000000000 -r 5314e5d6f040 test-data/tail_results1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tail_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,10 @@ +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - diff -r 000000000000 -r 5314e5d6f040 test-data/tail_results2.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tail_results2.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,56 @@ +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - diff -r 000000000000 -r 5314e5d6f040 test-data/unfold_column1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unfold_column1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,1 @@ +a b 1 2 3 4 5 c diff -r 000000000000 -r 5314e5d6f040 test-data/unfold_column_result1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unfold_column_result1.tabular Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,5 @@ +a b 1 c +a b 2 c +a b 3 c +a b 4 c +a b 5 c diff -r 000000000000 -r 5314e5d6f040 test-data/unique_results1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unique_results1.bed Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,65 @@ +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - diff -r 000000000000 -r 5314e5d6f040 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + $REPOSITORY_INSTALL_DIR + + + + + + + + + + + + + https://cpan.metacpan.org/authors/id/S/SA/SALVA/Sort-Key-1.33.tar.gz + + + + + Needed perl packages. + + + diff -r 000000000000 -r 5314e5d6f040 unfold_column.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unfold_column.py Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +import sys + +out = open(sys.argv[4], 'w+') + +with open(sys.argv[1]) as handle: + for line in handle: + cols = line.split('\t') + unfolding_column = int(sys.argv[2]) - 1 + column_content = cols[ unfolding_column ] + for elem in column_content.split( sys.argv[3] ): + out.write( '\t'.join( cols[:unfolding_column] + [elem] + cols[unfolding_column+1:]) ) +out.close() diff -r 000000000000 -r 5314e5d6f040 unfold_column.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unfold_column.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,63 @@ + + columns from a table + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 5314e5d6f040 unsorted_uniq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unsorted_uniq.xml Thu Jan 29 07:53:17 2015 -0500 @@ -0,0 +1,93 @@ + + occurrences of each record + + macros.xml + + + sort --version | head -n 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +