# HG changeset patch # User Yusuf Ali # Date 1427312107 21600 # Node ID f92e6aff30b78f24a0078dfc0b279c217be00c37 initial commit diff -r 000000000000 -r f92e6aff30b7 FilterTableByNamesList.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FilterTableByNamesList.xml Wed Mar 25 13:35:07 2015 -0600 @@ -0,0 +1,25 @@ + + + + against a list of desired column values + echo 1.0.0 + filter_by_list $case_sensitive $input_table $file_of_names $filtered_output_table $num_header_lines + + + + + + + + + + + + + +This tool retains lines of an input tabular file that have a column value matching any of the values in the "names" file. This is useful for example to +report only a subset of an HGVS or BED file corresponding to a set of genes of interest. The names file should have one name per line. If you are looking to +only match one name, it might be just as easy to use the generic Galaxy tool "Select lines that match an expression". + + + diff -r 000000000000 -r f92e6aff30b7 filter_by_list --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_by_list Wed Mar 25 13:35:07 2015 -0600 @@ -0,0 +1,57 @@ +#!/usr/bin/env perl + +# Report lines of a file that have as one of the column values a value from the pattern file +@ARGV == 6 or @ARGV == 7 or die "Usage: $0 [nonmatching output.tab]\n"; + +open(PATTERNS, $ARGV[2]) + or die "Cannot open $ARGV[1] for reading: $!\n"; +my @alts; +while(){ + chomp; + push @alts, quotemeta($_); +} +close(PATTERNS); + +my $regex = "(?:\\A|\\t|; )(?:".join("|", @alts).")(?:; |\\t|\\Z)"; +#print STDERR "Regex is $regex\n"; +open(OUT, ">$ARGV[3]") + or die "Cannot open $ARGV[3] for writing: $!\n"; +open(TAB, $ARGV[1]) + or die "Cannot open $ARGV[1] for reading: $!\n"; +if(@ARGV == 7){ + open(NONMATCH, ">$ARGV[6]") + or die "Cannot open $ARGV[6] for writing: $!\n"; +} +my $num_header_lines = $ARGV[4]; +if($num_header_lines > 0){ + while($num_header_lines--){ + my $header_line = ; + print OUT $header_line; + print NONMATCH $header_line if @ARGV == 6; + } +} +if($ARGV[0] =~ /^[t1]/i){ + my @F = split /\t/, $_; + while(){ + if($F[$ARGV[5]] =~ /$regex/o or $num_header_lines == -1 and /^#/){ + print OUT $_; + } + elsif(@ARGV == 7){ + print NONMATCH $_; + } + } +} +else{ # case insensitive + while(){ + my @F = split /\t/, $_; + if($F[$ARGV[5]] =~ /$regex/io or $num_header_lines == -1 and /^#/){ +# print STDERR $F[$ARGV[5]], "\n"; + print OUT $_; + } + elsif(@ARGV == 7){ + print NONMATCH $_; + } + } +} +close(TAB); +close(OUT);