# HG changeset patch # User alan-blakely # Date 1408651098 14400 # Node ID b41c88a535789b434d7f9a738a592e9bb8f6bab9 # Parent 397067d90a7782286762cac77a8cb574862c68d2 Uploaded diff -r 397067d90a77 -r b41c88a53578 submatch.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch.pl Thu Aug 21 15:58:18 2014 -0400 @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +#optional: remove duplicate lines using server-side shell command +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); + diff -r 397067d90a77 -r b41c88a53578 submatch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch.xml Thu Aug 21 15:58:18 2014 -0400 @@ -0,0 +1,49 @@ + + + + This tool allows for partial sequences to match lines containing a larger, more complete sequence. + + submatch.pl $sequences $report $output + + + + + + + + + + + + + + + + + + + + + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + + + +