Repository 'seq_filter_by_mapping'
hg clone https://toolshed.g2.bx.psu.edu/repos/peterjc/seq_filter_by_mapping

Changeset 2:48e71dfd51b3 (2017-05-10)
Previous changeset 1:8ff0ac66f1a3 (2015-05-13) Next changeset 3:481b0a925e66 (2017-05-17)
Commit message:
v0.0.5 Depend on Biopython 1.67 from Tool Shed or (Bio)conda
modified:
tools/seq_filter_by_mapping/README.rst
tools/seq_filter_by_mapping/seq_filter_by_mapping.py
tools/seq_filter_by_mapping/seq_filter_by_mapping.xml
tools/seq_filter_by_mapping/tool_dependencies.xml
b
diff -r 8ff0ac66f1a3 -r 48e71dfd51b3 tools/seq_filter_by_mapping/README.rst
--- a/tools/seq_filter_by_mapping/README.rst Wed May 13 11:08:58 2015 -0400
+++ b/tools/seq_filter_by_mapping/README.rst Wed May 10 13:16:44 2017 -0400
b
@@ -1,7 +1,7 @@
 Galaxy tool to filter FASTA, FASTQ or SFF sequences by SAM/BAM mapping
 ======================================================================
 
-This tool is copyright 2014-2015 by Peter Cock, The James Hutton Institute
+This tool is copyright 2014-2017 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.
 
@@ -66,6 +66,10 @@
 v0.0.4  - Use the ``format_source=...`` tag.
         - Reorder XML elements (internal change only).
         - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
+v0.0.5  - Python script cleanups (internal change only).
+        - Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
+        - Use ``<command detect_errors="aggressive">`` (internal change only).
+        - Single quote command line arguments (internal change only).
 ======= ======================================================================
 
 
@@ -82,17 +86,17 @@
 Planemo commands (which requires you have set your Tool Shed access details in
 ``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
 
-    $ planemo shed_upload --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_filter_by_mapping/
+    $ planemo shed_update -t testtoolshed --check_diff tools/seq_filter_by_mapping/
     ...
 
 or::
 
-    $ planemo shed_upload --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_filter_by_mapping/
+    $ planemo shed_update -t toolshed --check_diff tools/seq_filter_by_mapping/
     ...
 
 To just build and check the tar ball, use::
 
-    $ planemo shed_upload --tar_only  ~/repositories/pico_galaxy/tools/seq_filter_by_mapping/
+    $ planemo shed_upload --tar_only tools/seq_filter_by_mapping/
     ...
     $ tar -tzf shed_upload.tar.gz
     test-data/SRR639755_mito_pairs.fastq.gz
b
diff -r 8ff0ac66f1a3 -r 48e71dfd51b3 tools/seq_filter_by_mapping/seq_filter_by_mapping.py
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.py Wed May 13 11:08:58 2015 -0400
+++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.py Wed May 10 13:16:44 2017 -0400
[
b'@@ -18,17 +18,15 @@\n \n Use -v or --version to get the version, -h or --help for help.\n """\n+\n import os\n-import sys\n import re\n import subprocess\n+import sys\n+\n from optparse import OptionParser\n \n-def sys_exit(msg, err=1):\n-    sys.stderr.write(msg.rstrip() + "\\n")\n-    sys.exit(err)\n-\n-#Parse Command Line\n+# Parse Command Line\n usage = """Use as follows:\n \n $ python seq_filter_by_mapping.py [options] mapping.sam/bam [more mappings]\n@@ -64,7 +62,7 @@\n options, args = parser.parse_args()\n \n if options.version:\n-    print "v0.0.3"\n+    print "v0.0.5"\n     sys.exit(0)\n \n in_file = options.input\n@@ -74,27 +72,27 @@\n pair_mode = options.pair_mode\n \n if in_file is None or not os.path.isfile(in_file):\n-    sys_exit("Missing input file: %r" % in_file)\n+    sys.exit("Missing input file: %r" % in_file)\n if out_positive_file is None and out_negative_file is None:\n-    sys_exit("Neither output file requested")\n+    sys.exit("Neither output file requested")\n if seq_format is None:\n-    sys_exit("Missing sequence format")\n+    sys.exit("Missing sequence format")\n if pair_mode not in ["lax", "strict"]:\n-    sys_exit("Pair mode argument should be \'lax\' or \'strict\', not %r" % pair_mode)\n+    sys.exit("Pair mode argument should be \'lax\' or \'strict\', not %r" % pair_mode)\n for mapping in args:\n     if not os.path.isfile(mapping):\n-        sys_exit("Mapping file %r not found" % mapping)\n+        sys.exit("Mapping file %r not found" % mapping)\n if not args:\n-    sys_exit("At least one SAM/BAM mapping file is required")\n+    sys.exit("At least one SAM/BAM mapping file is required")\n \n \n-#Cope with three widely used suffix naming convensions,\n-#Illumina: /1 or /2\n-#Forward/revered: .f or .r\n-#Sanger, e.g. .p1k and .q1k\n-#See http://staden.sourceforge.net/manual/pregap4_unix_50.html\n-#re_f = re.compile(r"(/1|\\.f|\\.[sfp]\\d\\w*)$")\n-#re_r = re.compile(r"(/2|\\.r|\\.[rq]\\d\\w*)$")\n+# Cope with three widely used suffix naming convensions,\n+# Illumina: /1 or /2\n+# Forward/revered: .f or .r\n+# Sanger, e.g. .p1k and .q1k\n+# See http://staden.sourceforge.net/manual/pregap4_unix_50.html\n+# re_f = re.compile(r"(/1|\\.f|\\.[sfp]\\d\\w*)$")\n+# re_r = re.compile(r"(/2|\\.r|\\.[rq]\\d\\w*)$")\n re_suffix = re.compile(r"(/1|\\.f|\\.[sfp]\\d\\w*|/2|\\.r|\\.[rq]\\d\\w*)$")\n assert re_suffix.search("demo.f")\n assert re_suffix.search("demo.s1")\n@@ -107,6 +105,7 @@\n assert re_suffix.search("demo.q1")\n assert re_suffix.search("demo.q1lk")\n \n+\n def clean_name(name):\n     """Remove suffix."""\n     match = re_suffix.search(name)\n@@ -117,6 +116,8 @@\n     else:\n         # Nothing to do\n         return name\n+\n+\n assert clean_name("foo/1") == "foo"\n assert clean_name("foo/2") == "foo"\n assert clean_name("bar.f") == "bar"\n@@ -124,20 +125,22 @@\n assert clean_name("baz.p1") == "baz"\n assert clean_name("baz.q2") == "baz"\n \n-mapped_chars = { \'>\' :\'__gt__\',\n-                 \'<\' :\'__lt__\',\n-                 "\'" :\'__sq__\',\n-                 \'"\' :\'__dq__\',\n-                 \'[\' :\'__ob__\',\n-                 \']\' :\'__cb__\',\n-                 \'{\' :\'__oc__\',\n-                 \'}\' :\'__cc__\',\n-                 \'@\' : \'__at__\',\n-                 \'\\n\' : \'__cn__\',\n-                 \'\\r\' : \'__cr__\',\n-                 \'\\t\' : \'__tc__\',\n-                 \'#\' : \'__pd__\'\n-                 }\n+mapped_chars = {\n+    \'>\': \'__gt__\',\n+    \'<\': \'__lt__\',\n+    "\'": \'__sq__\',\n+    \'"\': \'__dq__\',\n+    \'[\': \'__ob__\',\n+    \']\': \'__cb__\',\n+    \'{\': \'__oc__\',\n+    \'}\': \'__cc__\',\n+    \'@\': \'__at__\',\n+    \'\\n\': \'__cn__\',\n+    \'\\r\': \'__cr__\',\n+    \'\\t\': \'__tc__\',\n+    \'#\': \'__pd__\',\n+}\n+\n \n def load_mapping_ids(filename, pair_mode, ids):\n     """Parse SAM/BAM file, updating given set of ids.\n@@ -189,7 +192,7 @@\n         if child.returncode:\n             msg = "Error %i from \'samtools view %s\'\\n%s" % (child.returncode,\n                                                             filename, stderr)\n-            sys_exit(msg.strip(), child.returncode)\n+            sys.exit(msg.strip(), child.returncode)\n     else:\n         handle.close()\n \n@@'..b'sitive_file is not None:\n+    elif pos_file is not None:\n         print "Generating matching FASTQ file"\n-        positive_handle = open(out_positive_file, "w")\n+        positive_handle = open(pos_file, "w")\n         for title, seq, qual in FastqGeneralIterator(handle):\n-            if clean_name(title.split(None, 1)[0]) in ids:\n+            if clean_name(title.split(None, 1)[0]) in wanted:\n                 positive_handle.write("@%s\\n%s\\n+\\n%s\\n" % (title, seq, qual))\n                 pos_count += 1\n             else:\n                 neg_count += 1\n         positive_handle.close()\n-    elif out_negative_file is not None:\n+    elif neg_file is not None:\n         print "Generating non-matching FASTQ file"\n-        negative_handle = open(out_negative_file, "w")\n+        negative_handle = open(neg_file, "w")\n         for title, seq, qual in FastqGeneralIterator(handle):\n-            if clean_name(title.split(None, 1)[0]) in ids:\n+            if clean_name(title.split(None, 1)[0]) in wanted:\n                 pos_count += 1\n             else:\n                 negative_handle.write("@%s\\n%s\\n+\\n%s\\n" % (title, seq, qual))\n@@ -328,48 +332,48 @@\n     try:\n         from Bio.SeqIO.SffIO import SffIterator, SffWriter\n     except ImportError:\n-        sys_exit("SFF filtering requires Biopython 1.54 or later")\n+        sys.exit("SFF filtering requires Biopython 1.54 or later")\n \n     try:\n         from Bio.SeqIO.SffIO import ReadRocheXmlManifest\n     except ImportError:\n-        #Prior to Biopython 1.56 this was a private function\n+        # Prior to Biopython 1.56 this was a private function\n         from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest\n \n-    in_handle = open(in_file, "rb") #must be binary mode!\n+    in_handle = open(in_file, "rb")  # must be binary mode!\n     try:\n         manifest = ReadRocheXmlManifest(in_handle)\n     except ValueError:\n         manifest = None\n \n-    #This makes two passes though the SFF file with isn\'t so efficient,\n-    #but this makes the code simple.\n+    # This makes two passes though the SFF file with isn\'t so efficient,\n+    # but this makes the code simple.\n     pos_count = neg_count = 0\n-    if out_positive_file is not None:\n-        out_handle = open(out_positive_file, "wb")\n+    if pos_file is not None:\n+        out_handle = open(pos_file, "wb")\n         writer = SffWriter(out_handle, xml=manifest)\n-        in_handle.seek(0) #start again after getting manifest\n-        pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in ids)\n+        in_handle.seek(0)  # start again after getting manifest\n+        pos_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) in wanted)\n         out_handle.close()\n-    if out_negative_file is not None:\n-        out_handle = open(out_negative_file, "wb")\n+    if neg_file is not None:\n+        out_handle = open(neg_file, "wb")\n         writer = SffWriter(out_handle, xml=manifest)\n-        in_handle.seek(0) #start again\n-        neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in ids)\n+        in_handle.seek(0)  # start again\n+        neg_count = writer.write_file(rec for rec in SffIterator(in_handle) if clean_name(rec.id) not in wanted)\n         out_handle.close()\n-    #And we\'re done\n+    # And we\'re done\n     in_handle.close()\n     return pos_count, neg_count\n \n \n-if seq_format.lower()=="sff":\n+if seq_format.lower() == "sff":\n     sequence_filter = sff_filter\n-elif seq_format.lower()=="fasta":\n+elif seq_format.lower() == "fasta":\n     sequence_filter = fasta_filter\n elif seq_format.lower().startswith("fastq"):\n     sequence_filter = fastq_filter\n else:\n-    sys_exit("Unsupported file type %r" % seq_format)\n+    sys.exit("Unsupported file type %r" % seq_format)\n \n pos_count, neg_count = sequence_filter(in_file, out_positive_file, out_negative_file, ids)\n print("%i mapped and %i unmapped reads." % (pos_count, neg_count))\n'
b
diff -r 8ff0ac66f1a3 -r 48e71dfd51b3 tools/seq_filter_by_mapping/seq_filter_by_mapping.xml
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.xml Wed May 13 11:08:58 2015 -0400
+++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.xml Wed May 10 13:16:44 2017 -0400
b
@@ -1,28 +1,23 @@
-<tool id="seq_filter_by_mapping" name="Filter sequences by mapping" version="0.0.4">
+<tool id="seq_filter_by_mapping" name="Filter sequences by mapping" version="0.0.5">
     <description>from SAM/BAM file</description>
     <requirements>
-        <requirement type="package" version="1.64">biopython</requirement>
-        <requirement type="python-module">Bio</requirement>
-        <requirement type="binary">samtools</requirement>
+        <requirement type="package" version="1.67">biopython</requirement>
         <requirement type="package" version="0.1.19">samtools</requirement>
     </requirements>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
-    <version_command interpreter="python">seq_filter_by_mapping.py --version</version_command>
-    <command interpreter="python">
-seq_filter_by_mapping.py -i "$input_file" -f "$input_file.ext" -m $pair_mode
+    <version_command>
+python $__tool_directory__/seq_filter_by_mapping.py --version
+    </version_command>
+    <command detect_errors="aggressive">
+python $__tool_directory__/seq_filter_by_mapping.py -i '$input_file' -f '$input_file.ext' -m $pair_mode
 #if $output_choice_cond.output_choice=="both"
- -p $output_pos -n $output_neg
+ -p '$output_pos' -n '$output_neg'
 #elif $output_choice_cond.output_choice=="pos"
- -p $output_pos
+ -p '$output_pos'
 #elif $output_choice_cond.output_choice=="neg"
- -n $output_neg
+ -n '$output_neg'
 #end if
 ## Now loop over all the mapping files
-#for i in $mapping_file#${i} #end for#
+#for i in $mapping_file#'${i}' #end for#
     </command>
     <inputs>
         <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." />
b
diff -r 8ff0ac66f1a3 -r 48e71dfd51b3 tools/seq_filter_by_mapping/tool_dependencies.xml
--- a/tools/seq_filter_by_mapping/tool_dependencies.xml Wed May 13 11:08:58 2015 -0400
+++ b/tools/seq_filter_by_mapping/tool_dependencies.xml Wed May 10 13:16:44 2017 -0400
b
@@ -1,9 +1,9 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="biopython" version="1.64">
-        <repository changeset_revision="5477a05cc158" name="package_biopython_1_64" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
+    <package name="biopython" version="1.67">
+        <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="samtools" version="0.1.19">
-        <repository changeset_revision="96aab723499f" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="c9bd782f5342" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>