Previous changeset 2:397067d90a77 (2014-08-21) |
Commit message:
Uploaded |
added:
submatch.pl submatch.xml |
b |
diff -r 397067d90a77 -r b41c88a53578 submatch.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch.pl Thu Aug 21 15:58:18 2014 -0400 |
[ |
@@ -0,0 +1,41 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (<SEQ>){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (<REP>){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +#optional: remove duplicate lines using server-side shell command +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); + |
b |
diff -r 397067d90a77 -r b41c88a53578 submatch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch.xml Thu Aug 21 15:58:18 2014 -0400 |
b |
@@ -0,0 +1,49 @@ + +<tool id="submatch" name="Filter by Substring Match"> + + <description>This tool allows for partial sequences to match lines containing a larger, more complete sequence. </description> + + <command interpreter="perl">submatch.pl $sequences $report $output </command> + + <inputs> + + <param format="tabular" name="sequences" type="data" label="Tabular file containing list to filter on."/> + <param format="tabular" name="report" type="data" label="Tabular file to be filtered."/> + + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="sequences" value="submatch_input.tab"/> + <param name="report" value="submatch_input2.tab"/> + <output name="output" file="submatch_output.tab"/> + </test> + </tests> + +<help> + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + +</help> + +</tool> |