Next changeset 1:9af08aa008de (2014-08-21) |
Commit message:
Uploaded |
added:
submatch/partial.pl~ submatch/pmatch.pl~ submatch/pmatch.xml~ submatch/pmatchbak.pl~ submatch/submatch.pl submatch/submatch.xml submatch/submatch.xml~ |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/partial.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/partial.pl~ Thu Aug 21 12:48:08 2014 -0400 |
[ |
@@ -0,0 +1,35 @@ +#!/usr/bin/perl -w +use JSON; + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + + +my (%seq_hash, $i, $n); +$i=0; +$n=0; + +while (<SEQ>) { + + %seq_hash{$i}=$_; + i++; + +} +close SEQ; + +while ($n < $i){ + while (<REP>) { + if (/.*$seq_hash{$i}.*/) { + print OUT $1; + + } + } +n++; +} + +close (REP); +close (OUT); + + |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/pmatch.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/pmatch.pl~ Thu Aug 21 12:48:08 2014 -0400 |
[ |
@@ -0,0 +1,40 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (<SEQ>){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (<REP>){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); + |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/pmatch.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/pmatch.xml~ Thu Aug 21 12:48:08 2014 -0400 |
b |
@@ -0,0 +1,59 @@ + +<tool id="pmatch" name="Filter by Partial Match"> + + <description>Filters a tabular file using fragment lines from a second file. </description> + + <command interpreter="perl">pmatch.pl $sequences $report $output </command> + + <inputs> + + <param format="tabular" name="sequences" type="data" label="Tabular file containing list to filter on."/> + <param format="tabular" name="report" type="data" label="Tabular file to be filtered."/> + + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="sequences" value="pmatch_input.tab"/> + <param name="report" value="pmatch_input2.tab"/> + <output name="output" file="pmatch_output.tab"/> + </test> + </tests> + +<help> + + + +Each line from the first file is converted to a regular expression of the form .*LINE.* + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match a larger more complete sequence. +Example: a file containing peptide sequences is used to select matching lines from a protien database in tabular form. + + +Input File: File to be filtered: + +PLPNVPQPGR >sp|P15450|ABAE_APIME Abaecin MKVVIFIFALLATICAAFAYV<b>PLPNVPQPGR</b>RPFPTFPGQGPFNPKIKWPQGY +PETALCARR >sp|P85829|BP01_APIME Brain peptide MVPVPVHHMADELLRNGPDTVI +EPFKISIHL >sp|P01500|APAM_APIME Apamin OS=Apis mellifera PE=1 SV=2 MISMLRCIYLFLSVILITSYFVTPVMPCNCKA<b>PETALCARR</b>CQQHG + >sp|P85830|DIUX_APIME Diuretic hormone class 2 OS=Apis mellifera PE=1 SV=1 GLDLGLSRGFSGSQAAKHLMGLAAANYAGGP + >sp|P84759|JELL3_APIME Jellein-3 OS=Apis mellifera PE=1 SV=1 <b>EPFKISIHL</b> + +Output: + +>sp|P15450|ABAE_APIME Abaecin MKVVIFIFALLATICAAFAYVPLPNVPQPGRRPFPTFPGQGPFNPKIKWPQGY +>sp|P01500|APAM_APIME Apamin OS=Apis mellifera PE=1 SV=2 MISMLRCIYLFLSVILITSYFVTPVMPCNCKAPETALCARRCQQHG +>sp|P84759|JELL3_APIME Jellein-3 OS=Apis mellifera PE=1 SV=1 EPFKISIHL + + + +</help> + +</tool> |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/pmatchbak.pl~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/pmatchbak.pl~ Thu Aug 21 12:48:08 2014 -0400 |
[ |
@@ -0,0 +1,38 @@ +#!/usr/bin/perl -w +use JSON; + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + + +my ($seq, $reg); + + +while (<SEQ>) { + + $seq=$_; + + while (<REP>) { + if (index($_, $seq) == -1){ + } + else{ + print OUT $_; + } + if (index($_, "NKLEGLEDALQK") != -1){ + print OUT $_; + } + print OUT $seq; + } + +} + + + + +close (SEQ); +close (REP); +close (OUT); + + |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/submatch.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/submatch.pl Thu Aug 21 12:48:08 2014 -0400 |
[ |
@@ -0,0 +1,41 @@ +#!/usr/bin/perl -w +use strict; +my (@seq, @rep, $i, $n, $l, $t); + +open (SEQ, "<$ARGV[0]"); +open (REP, "<$ARGV[1]"); +open (OUT, ">$ARGV[2]"); + + +$i=$n=$l=0; + + + +while (<SEQ>){ + chomp(); + $seq[$i] = $_; + $i++; +} +while (<REP>){ + $rep[$n] = $_; + $n++; + +} +while ($l < $i){ + $t=0; + while ($t < $n){ + print OUT $rep[$t] if $rep[$t] =~ /$seq[$l]/; + $t++; + } + $l++; +} + + + +close (SEQ); +close (REP); +close (OUT); + +#optional: remove duplicate lines using server-side shell command +system("sort", "-u", "-o", "$ARGV[2]", "$ARGV[2]"); + |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/submatch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/submatch.xml Thu Aug 21 12:48:08 2014 -0400 |
b |
@@ -0,0 +1,49 @@ + +<tool id="pmatch" name="Filter by Substring Match"> + + <description>This tool allows for partial sequences to match lines containing a larger, more complete sequence. </description> + + <command interpreter="perl">submatch.pl $sequences $report $output </command> + + <inputs> + + <param format="tabular" name="sequences" type="data" label="Tabular file containing list to filter on."/> + <param format="tabular" name="report" type="data" label="Tabular file to be filtered."/> + + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="sequences" value="submatch_input.tab"/> + <param name="report" value="submatch_input2.tab"/> + <output name="output" file="submatch_output.tab"/> + </test> + </tests> + +<help> + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + +</help> + +</tool> |
b |
diff -r 000000000000 -r 05e720d0deb5 submatch/submatch.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/submatch/submatch.xml~ Thu Aug 21 12:48:08 2014 -0400 |
b |
@@ -0,0 +1,49 @@ + +<tool id="pmatch" name="Filter by Partial Match"> + + <description>Filters a tabular file using fragment lines from a second file. </description> + + <command interpreter="perl">pmatch.pl $sequences $report $output </command> + + <inputs> + + <param format="tabular" name="sequences" type="data" label="Tabular file containing list to filter on."/> + <param format="tabular" name="report" type="data" label="Tabular file to be filtered."/> + + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="sequences" value="pmatch_input.tab"/> + <param name="report" value="pmatch_input2.tab"/> + <output name="output" file="pmatch_output.tab"/> + </test> + </tests> + +<help> + + + +Each line from the list file is converted to a regular expression of the form .*LINE.* + + + +Lines from the second file that match the expression are returned. + + + + +This tool allows for partial sequences to match lines containing a larger more complete sequence. + +Example: a file containing fragment amino acid sequences is used to select corresponding lines from a peptide report. + + + + + +</help> + +</tool> |