Mercurial > repos > xuebing > sharplabtool
diff tools/unix_tools/word_list_grep.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/unix_tools/word_list_grep.xml Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,106 @@ +<tool id="cshl_word_list_grep" name="Select lines"> +<description>by word list</description> +<command interpreter="perl"> + word_list_grep.pl + #if $searchwhere.choice == "column": + -c $searchwhere.column + #end if + -o $output + $inverse + $caseinsensitive + $wholewords + $skip_first_line + $wordlist + $input +</command> + +<inputs> + <param name="input" format="txt" type="data" label="input file" /> + <param name="wordlist" format="txt" type="data" label="word list file" /> + + + <param name="inverse" type="boolean" checked="false" truevalue="-v" falsevalue="" label="Inverse filter" + help="Report lines NOT matching the word list" /> + + <param name="caseinsensitive" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Case-Insensitive search" + help="" /> + + <param name="wholewords" type="boolean" checked="false" truevalue="-w" falsevalue="" label="find whole-words" + help="ignore partial matches (e.g. 'apple' will not match 'snapple') " /> + + <param name="skip_first_line" type="boolean" checked="false" truevalue="-s" falsevalue="" label="Ignore first line" + help="Select this option if the first line contains column headers. First line will not be filtered. " /> + + <conditional name="searchwhere"> + <param name="choice" type="select" label="Search words in"> + <option value="line" selected="true">entire line</option> + <option value="column">specific column</option> + </param> + + <when value="line"> + </when> + + <when value="column"> + <param name="column" label="in column" type="data_column" data_ref="input" accept_default="true" /> + </when> + </conditional> + +</inputs> + +<outputs> + <data name="output" format="input" metadata_source="input" /> +</outputs> + +<help> +**What it does** + +This tool selects lines that match words from a word list. + +-------- + +**Example** + +Input file (UCSC's rmsk track from dm3):: + + 585 787 66 241 11 chrXHet 2860 3009 -201103 - DNAREP1_DM LINE Penelope 0 594 435 1 + 585 1383 78 220 0 chrXHet 3012 3320 -200792 - DNAREP1_DM LINE Penelope -217 377 2 1 + 585 244 103 0 0 chrXHet 3737 3776 -200336 - DNAREP1_DM LINE Penelope -555 39 1 1 + 585 2270 83 144 0 chrXHet 7907 8426 -195686 + DNAREP1_DM LINE Penelope 1 594 0 1 + 585 617 189 73 68 chrXHet 10466 10671 -193441 + DNAREP1_DM LINE Penelope 368 573 -21 1 + 586 1122 71 185 0 chrXHet 173138 173322 -30790 - PROTOP DNA P -4033 447 230 1 + ... + ... + + +Word list file:: + + STALKER + PROTOP + + + +Output sequence (searching in column 11):: + + 586 1122 71 185 0 chrXHet 173138 173322 -30790 - PROTOP DNA P -4033 447 230 1 + 586 228 162 0 0 chrXHet 181026 181063 -23049 + STALKER4_I LTR Gypsy 9 45 -6485 1 + 585 245 105 26 0 chr3R 41609 41647 -27863406 + PROTOP_B DNA P 507 545 -608 4 + 586 238 91 0 0 chr3R 140224 140257 -27764796 - PROTOP_B DNA P -617 536 504 4 + ... + ... + +( With **find whole-words** not selected, *PROTOP* matched *PROTOP_B*, *STALKER* matched *STALKER4_I* ) + + + + +Output sequence (searching in column 11, and whole-words only):: + + 586 670 90 38 57 chrXHet 168356 168462 -35650 - PROTOP DNA P -459 4021 3918 1 + 586 413 139 70 0 chrXHet 168462 168548 -35564 - PROTOP DNA P -3406 1074 983 1 + 586 1122 71 185 0 chrXHet 173138 173322 -30790 - PROTOP DNA P -4033 447 230 1 + ... + ... + +</help> + +</tool>