Repository 'find_subsequences'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/find_subsequences

Changeset 1:d882a0a75759 (2015-04-10)
Previous changeset 0:7f39014f9404 (2015-03-20)
Commit message:
Uploaded
modified:
find_subsequences.py
find_subsequences.xml
tool_dependencies.xml
added:
test-data/find_subsequences_advanced_result2.bed
test-data/find_subsequences_advanced_result3.bed
b
diff -r 7f39014f9404 -r d882a0a75759 find_subsequences.py
--- a/find_subsequences.py Fri Mar 20 06:23:17 2015 -0400
+++ b/find_subsequences.py Fri Apr 10 06:49:30 2015 -0400
[
@@ -10,7 +10,7 @@
 
 choices = ['embl', 'fasta', 'fastq-sanger', 'fastq', 'fastq-solexa', 'fastq-illumina', 'genbank', 'gb']
 
-def find_pattern(seqs, pattern, outfile_path):
+def find_pattern(seqs, pattern, outfile_path, strand):
     """
     Finds all occurrences of a pattern in the a given sequence.
     Outputs sequence ID, start and end postion of the pattern.
@@ -23,8 +23,10 @@
 
     with open(outfile_path, 'w+') as outfile:
         for seq in seqs:
-            search_func(seq, pattern, outfile)
-            search_func(seq, rev_compl, outfile, '-')
+            if strand in ['both', 'forward']:
+                search_func(seq, pattern, outfile)
+            if strand in ['both', 'reverse']:
+                search_func(seq, rev_compl, outfile, '-')
 
 
 def simple_pattern_search(sequence, pattern, outfile, strand='+'):
@@ -50,12 +52,13 @@
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--input' , required=True)
-    parser.add_argument('-o', '--output' , required=True)
-    parser.add_argument('-p', '--pattern' , required=True)
+    parser.add_argument('-i', '--input', required=True)
+    parser.add_argument('-o', '--output', required=True)
+    parser.add_argument('-p', '--pattern', required=True)
+    parser.add_argument('--strand', choices=['both', 'forward', 'reverse'], default='both')
     parser.add_argument('-f', '--format', default="fasta", choices=choices)
     args = parser.parse_args()
 
     with open(args.input) as handle:
-        find_pattern( SeqIO.parse(handle, args.format), args.pattern, args.output )
+        find_pattern( SeqIO.parse(handle, args.format), args.pattern, args.output, args.strand )
 
b
diff -r 7f39014f9404 -r d882a0a75759 find_subsequences.xml
--- a/find_subsequences.xml Fri Mar 20 06:23:17 2015 -0400
+++ b/find_subsequences.xml Fri Apr 10 06:49:30 2015 -0400
[
@@ -1,4 +1,4 @@
-<tool id="bg_find_subsequences" name="Nucleotide subsequence search" version="0.1">
+<tool id="bg_find_subsequences" name="Nucleotide subsequence search" version="0.2">
     <description>providing regions in BED format</description>
     <requirements>
         <requirement type="package" version="1.65">biopython</requirement>
@@ -6,14 +6,15 @@
     <command interpreter="python">
     <![CDATA[
         find_subsequences.py
-            --input $input
-            --output $output
+            --input "${input}"
+            --output "${output}"
             --pattern "$pattern_conditional.pattern"
             #if $input.ext == 'fasta':
                 --format 'fasta'
             #else:
                 --format 'fastq'
             #end if
+            --strand '$strand'
     ]]>
     </command>
     <inputs>
@@ -248,6 +249,11 @@
                 </param>
             </when>
         </conditional>
+        <param name="strand" type="select" label="Search pattern on">
+            <option value='both'>both strands</option>
+            <option value='forward'>forward strand</option>
+            <option value='reverse'>reverse strand</option>
+        </param>
     </inputs>
     <outputs>
         <data format="bed" name="output" />
@@ -271,6 +277,20 @@
             <param name="pattern_conditional_select" value="user"/>
             <output name="output" file="find_subsequences_advanced_result1.bed" ftype="bed"/>
         </test>
+        <test>
+            <param name="input" value="find_subsequences_input1.fasta" ftype="fasta"/>
+            <param name="pattern" value="atnncg"/>
+            <param name="strand" value="reverse"/>
+            <param name="pattern_conditional_select" value="user"/>
+            <output name="output" file="find_subsequences_advanced_result2.bed" ftype="bed"/>
+        </test>
+        <test>
+            <param name="input" value="find_subsequences_input1.fasta" ftype="fasta"/>
+            <param name="pattern" value="atnncg"/>
+            <param name="strand" value="forward"/>
+            <param name="pattern_conditional_select" value="user"/>
+            <output name="output" file="find_subsequences_advanced_result3.bed" ftype="bed"/>
+        </test>
     </tests>
     <help>
 <![CDATA[
b
diff -r 7f39014f9404 -r d882a0a75759 test-data/find_subsequences_advanced_result2.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/find_subsequences_advanced_result2.bed Fri Apr 10 06:49:30 2015 -0400
b
@@ -0,0 +1,1 @@
+reverse_advanced 9 15 reverse_advanced -
b
diff -r 7f39014f9404 -r d882a0a75759 test-data/find_subsequences_advanced_result3.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/find_subsequences_advanced_result3.bed Fri Apr 10 06:49:30 2015 -0400
b
@@ -0,0 +1,1 @@
+forward_advanced 9 15 forward_advanced +
b
diff -r 7f39014f9404 -r d882a0a75759 tool_dependencies.xml
--- a/tool_dependencies.xml Fri Mar 20 06:23:17 2015 -0400
+++ b/tool_dependencies.xml Fri Apr 10 06:49:30 2015 -0400
b
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-  <package name="biopython" version="1.65">
-      <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="biopython" version="1.65">
+        <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>