Mercurial > repos > alan-blakely > filter_by_substring_match
changeset 3:b41c88a53578 draft default tip
Uploaded
author | alan-blakely |
---|---|
date | Thu, 21 Aug 2014 15:58:18 -0400 |
parents | 397067d90a77 |
children | |
files | submatch.pl submatch.xml |
diffstat | 2 files changed, 90 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch.pl Thu Aug 21 15:58:18 2014 -0400 @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (<SEQ>){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (<REP>){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +#optional: remove duplicate lines using server-side shell command +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch.xml Thu Aug 21 15:58:18 2014 -0400 @@ -0,0 +1,49 @@ + +<tool id="submatch" name="Filter by Substring Match"> + + <description>This tool allows for partial sequences to match lines containing a larger, more complete sequence. </description> + + <command interpreter="perl">submatch.pl $sequences $report $output </command> + + <inputs> + + <param format="tabular" name="sequences" type="data" label="Tabular file containing list to filter on."/> + <param format="tabular" name="report" type="data" label="Tabular file to be filtered."/> + + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="sequences" value="submatch_input.tab"/> + <param name="report" value="submatch_input2.tab"/> + <output name="output" file="submatch_output.tab"/> + </test> + </tests> + +<help> + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + +</help> + +</tool>