# HG changeset patch # User Jim Johnson # Date 1415909390 21600 # Node ID c4fd2ea4f988424d2af5183eaaa97e4a768ccace # Parent e83e0ce8fb68ca1da0fc59519f7a5e012fae79af Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA) diff -r e83e0ce8fb68 -r c4fd2ea4f988 find_in_reference.py --- a/find_in_reference.py Wed Aug 13 15:01:33 2014 -0500 +++ b/find_in_reference.py Thu Nov 13 14:09:50 2014 -0600 @@ -42,12 +42,28 @@ parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) parser.add_option( '-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string' ) + parser.add_option( '-B', '--test_reverse', dest='test_reverse', action="store_true", default=False, help='Also search for reversed input string in reference' ) + parser.add_option( '-D', '--test_dna_reverse_complement', dest='test_reverse_comp', action="store_true", default=False, help='Also search for the DNA reverse complement of input string' ) parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) (options, args) = parser.parse_args() + + revcompl = lambda x: ''.join([{'A':'T','C':'G','G':'C','T':'A','a':'t','c':'g','g':'c','t':'a','N':'N','n':'n'}[B] for B in x][::-1]) + def test_rcomplement(seq, target): + if options.test_reverse_comp: + try: + comp = revcompl(seq) + return comp in target + except: + pass + return False + + def test_reverse(seq,target): + return options.test_reverse and seq and seq[::-1] in target + # Input files if options.input != None: try: @@ -116,7 +132,7 @@ target = target_string if not options.reverse_find else search_string if options.debug: print >> sys.stderr, "in: %s %s %s" % (search,search in target,target) - if search in target: + if search in target or test_reverse(search,target) or test_rcomplement(search,target): found = True if annotate: annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) diff -r e83e0ce8fb68 -r c4fd2ea4f988 find_in_reference.xml --- a/find_in_reference.xml Wed Aug 13 15:01:33 2014 -0500 +++ b/find_in_reference.xml Thu Nov 13 14:09:50 2014 -0600 @@ -1,5 +1,5 @@ - + filter peptides that are present in proteins find_in_reference.py --input "$input" --reference "$reference" @@ -7,7 +7,7 @@ --input_column $column.input_column --reference_column $column.reference_column #end if - $case_insensitive $reverse_find + $case_insensitive $reverse_find $test_reverse $test_dna_reverse_complement #if 'novel' in $outputs.__str__ or not 'found' in $outputs.__str__: --output "$novel" #end if @@ -44,6 +44,8 @@ + +