# HG changeset patch # User alan-blakely # Date 1408639688 14400 # Node ID 05e720d0deb5b587af4addc7b3bf00eca610bf4c Uploaded diff -r 000000000000 -r 05e720d0deb5 submatch/partial.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/partial.pl~ Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,35 @@ +#!/usr/bin/perl -w +use JSON; + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + + +my (%seq_hash, $i, $n); +$i=0; +$n=0; + +while () { + + %seq_hash{$i}=$_; + i++; + +} +close SEQ; + +while ($n < $i){ + while () { + if (/.*$seq_hash{$i}.*/) { + print OUT $1; + + } + } +n++; +} + +close (REP); +close (OUT); + + diff -r 000000000000 -r 05e720d0deb5 submatch/pmatch.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/pmatch.pl~ Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,40 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); + diff -r 000000000000 -r 05e720d0deb5 submatch/pmatch.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/pmatch.xml~ Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,59 @@ + + + + Filters a tabular file using fragment lines from a second file. + + pmatch.pl $sequences $report $output + + + + + + + + + + + + + + + + + + + + + + + +Each line from the first file is converted to a regular expression of the form .*LINE.* + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match a larger more complete sequence. +Example: a file containing peptide sequences is used to select matching lines from a protien database in tabular form. + + +Input File: File to be filtered: + +PLPNVPQPGR >sp|P15450|ABAE_APIME Abaecin MKVVIFIFALLATICAAFAYVPLPNVPQPGRRPFPTFPGQGPFNPKIKWPQGY +PETALCARR >sp|P85829|BP01_APIME Brain peptide MVPVPVHHMADELLRNGPDTVI +EPFKISIHL >sp|P01500|APAM_APIME Apamin OS=Apis mellifera PE=1 SV=2 MISMLRCIYLFLSVILITSYFVTPVMPCNCKAPETALCARRCQQHG + >sp|P85830|DIUX_APIME Diuretic hormone class 2 OS=Apis mellifera PE=1 SV=1 GLDLGLSRGFSGSQAAKHLMGLAAANYAGGP + >sp|P84759|JELL3_APIME Jellein-3 OS=Apis mellifera PE=1 SV=1 EPFKISIHL + +Output: + +>sp|P15450|ABAE_APIME Abaecin MKVVIFIFALLATICAAFAYVPLPNVPQPGRRPFPTFPGQGPFNPKIKWPQGY +>sp|P01500|APAM_APIME Apamin OS=Apis mellifera PE=1 SV=2 MISMLRCIYLFLSVILITSYFVTPVMPCNCKAPETALCARRCQQHG +>sp|P84759|JELL3_APIME Jellein-3 OS=Apis mellifera PE=1 SV=1 EPFKISIHL + + + + + + diff -r 000000000000 -r 05e720d0deb5 submatch/pmatchbak.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/pmatchbak.pl~ Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/perl -w +use JSON; + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + + +my ($seq, $reg); + + +while () { + + $seq=$_; + + while () { + if (index($_, $seq) == -1){ + } + else{ + print OUT $_; + } + if (index($_, "NKLEGLEDALQK") != -1){ + print OUT $_; + } + print OUT $seq; + } + +} + + + + +close (SEQ); +close (REP); +close (OUT); + + diff -r 000000000000 -r 05e720d0deb5 submatch/submatch.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/submatch.pl Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +#optional: remove duplicate lines using server-side shell command +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); + diff -r 000000000000 -r 05e720d0deb5 submatch/submatch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/submatch.xml Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,49 @@ + + + + This tool allows for partial sequences to match lines containing a larger, more complete sequence. + + submatch.pl $sequences $report $output + + + + + + + + + + + + + + + + + + + + + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + + + + diff -r 000000000000 -r 05e720d0deb5 submatch/submatch.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/submatch.xml~ Thu Aug 21 12:48:08 2014 -0400 @@ -0,0 +1,49 @@ + + + + Filters a tabular file using fragment lines from a second file. + + pmatch.pl $sequences $report $output + + + + + + + + + + + + + + + + + + + + + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + + + +