changeset 2:c4fd2ea4f988

Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA)
author Jim Johnson <jj@umn.edu>
date Thu, 13 Nov 2014 14:09:50 -0600
parents e83e0ce8fb68
children 2429b413d90a
files find_in_reference.py find_in_reference.xml
diffstat 2 files changed, 21 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/find_in_reference.py	Wed Aug 13 15:01:33 2014 -0500
+++ b/find_in_reference.py	Thu Nov 13 14:09:50 2014 -0600
@@ -42,12 +42,28 @@
   parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)')
   parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' )
   parser.add_option( '-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string' )
+  parser.add_option( '-B', '--test_reverse', dest='test_reverse', action="store_true", default=False, help='Also search for reversed input string in reference' )
+  parser.add_option( '-D', '--test_dna_reverse_complement', dest='test_reverse_comp', action="store_true", default=False, help='Also search for the DNA reverse complement of input string' )
   parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' )
   parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' )
   parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' )
   parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' )
   parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout'  )
   (options, args) = parser.parse_args()
+
+  revcompl = lambda x: ''.join([{'A':'T','C':'G','G':'C','T':'A','a':'t','c':'g','g':'c','t':'a','N':'N','n':'n'}[B] for B in x][::-1])
+  def test_rcomplement(seq, target):
+    if options.test_reverse_comp:
+      try:
+        comp = revcompl(seq)
+        return comp in target
+      except:
+        pass
+    return False
+
+  def test_reverse(seq,target):
+    return options.test_reverse and seq and seq[::-1] in target
+  
   # Input files
   if options.input != None:
     try:
@@ -116,7 +132,7 @@
         target = target_string if not options.reverse_find else search_string
         if options.debug: 
           print >> sys.stderr, "in: %s %s %s" % (search,search in target,target)
-        if search in target:
+        if search in target or test_reverse(search,target) or test_rcomplement(search,target):
           found = True
           if annotate:
             annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns])
--- a/find_in_reference.xml	Wed Aug 13 15:01:33 2014 -0500
+++ b/find_in_reference.xml	Thu Nov 13 14:09:50 2014 -0600
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="find_in_reference" name="find in reference" version="0.0.2">
+<tool id="find_in_reference" name="find in reference" version="0.0.3">
   <description>filter peptides that are present in proteins</description>
   <command interpreter="python">find_in_reference.py  --input "$input" 
   --reference "$reference" 
@@ -7,7 +7,7 @@
     --input_column $column.input_column
     --reference_column $column.reference_column
   #end if
-  $case_insensitive $reverse_find
+  $case_insensitive $reverse_find $test_reverse $test_dna_reverse_complement
   #if 'novel' in $outputs.__str__ or not 'found' in $outputs.__str__:
     --output "$novel"
   #end if
@@ -44,6 +44,8 @@
     </conditional>
     <param name="case_insensitive" type="boolean" truevalue="--case_insensitive" falsevalue="" checked="false" label="Ignore case when comparing"/>
     <param name="reverse_find" type="boolean" truevalue="--reverse_find" falsevalue="" checked="false" label="reverse search: find the reference in the input" />
+    <param name="test_reverse" type="boolean" truevalue="--test_reverse" falsevalue="" checked="false" label="Also search for reversed input string in the reference" />
+    <param name="test_dna_reverse_complement" type="boolean" truevalue="--test_dna_reverse_complement" falsevalue="" checked="false" label="Also search for the DNA reverse complementof of the input in the reference" />
     <param name="outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">
       <option value="novel" selected="true">lines with no match in reference</option>
       <option value="found">lines with match in reference</option>